#include "sql/range_optimizer/group_index_skip_scan_plan.h"
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include <algorithm>
#include <limits>
#include "mem_root_deque.h"
#include "my_bitmap.h"
#include "my_compiler.h"
#include "my_dbug.h"
#include "mysql/strings/m_ctype.h"
#include "mysql/udf_registration_types.h"
#include "sql/field.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_cmpfunc.h"
#include "sql/item_func.h"
#include "sql/item_sum.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/bit_utils.h"
#include "sql/key.h"
#include "sql/key_spec.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_statistics.h"
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/parser_yystype.h"
#include "sql/range_optimizer/group_index_skip_scan.h"
#include "sql/range_optimizer/index_range_scan_plan.h"
#include "sql/range_optimizer/internal.h"
#include "sql/range_optimizer/path_helpers.h"
#include "sql/range_optimizer/range_opt_param.h"
#include "sql/range_optimizer/tree.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_select.h"
#include "sql/table.h"
#include "sql/visible_fields.h"
#include "sql_string.h"
#include "template_utils.h"

Functions
static bool	add_range (MEM_ROOT return_mem_root, SEL_ARG sel_range, uint key_length, Quick_ranges *range_array)

void	trace_basic_info_group_index_skip_scan (THD thd, const AccessPath path, const RANGE_OPT_PARAM , Opt_trace_object trace_object)

static uint	get_field_keypart (KEY index, const Field field)

static bool	check_key_infix (SEL_ROOT index_range_tree, KEY_PART_INFO first_non_group_part, KEY_PART_INFO min_max_arg_part, KEY_PART_INFO last_part, uint key_infix_len, KEY_PART_INFO first_non_infix_part, uint infix_factor, KEY *index_info)

static bool	check_group_min_max_predicates (Item cond, Item_field min_max_arg_item, Field::imagetype image_type)

static bool	min_max_inspect_cond_for_fields (Item cond, Item_field min_max_arg_item, bool min_max_arg_present, bool non_min_max_arg_present)
	This function detects the presents of MIN/MAX field along with at least one non MIN/MAX field participation in the given condition. More...

static void	cost_group_skip_scan (TABLE table, uint key, uint used_key_parts, uint group_key_parts, SEL_TREE range_tree, ha_rows quick_prefix_records, bool have_min, bool have_max, uint infix_factor, Cost_estimate cost_est, ha_rows records, bool single_group)

void	collect_group_skip_scans (THD thd, RANGE_OPT_PARAM param, SEL_TREE tree, enum_order order_direction, bool skip_records_in_range, Mem_root_array< GroupIndexSkipScanInfo > *possible_group_skip_scans, bool &have_min, bool &have_max)
	Analyze indexes to see if a group index skip scan is possible and save GroupIndexSkipScanInfo data for every possible group index skip scan. More...

GroupIndexSkipScanInfo *	select_best_group_skip_scan (Mem_root_array< GroupIndexSkipScanInfo * > *possible_group_skip_scans)
	Iterates over list of group skip scan candidates and returns pointer to candidate with lowest cost. More...

AccessPath *	make_group_skip_scan_path (THD thd, RANGE_OPT_PARAM param, SEL_TREE tree, GroupIndexSkipScanInfo group_skip_scan_info, double cost_est, bool have_min, bool have_max)
	Build a GROUP_INDEX_SKIP_SCAN AccessPath based on scan info. More...

AccessPath *	get_best_group_skip_scan (THD thd, RANGE_OPT_PARAM param, SEL_TREE *tree, enum_order order_direction, bool skip_records_in_range, double cost_est)
	Test if this access method is applicable to a GROUP query with MIN/MAX functions, and if so, construct a new AccessPath. More...

Mem_root_array< AccessPath * >	get_all_group_skip_scans (THD thd, RANGE_OPT_PARAM param, SEL_TREE *tree, enum_order order_direction, bool skip_records_in_range, double cost_est)
	Test if group index skip scan is applicable and if so, construct a new AccessPath for every candidate group index skip scan. More...

static void	util_min_max_inspect_item (Item item_field, Item_field min_max_arg_item, bool min_max_arg_present, bool non_min_max_arg_present)
	Utility function used by min_max_inspect_cond_for_fields() for comparing FIELD item with given MIN/MAX item and setting appropriate out parameter. More...

void	dbug_dump_group_index_skip_scan (int indent, bool, const AccessPath *path)

Function Documentation

◆ add_range()

static bool add_range	(	MEM_ROOT *	return_mem_root,
		SEL_ARG *	sel_range,
		uint	key_length,
		Quick_ranges *	range_array
	)

static

◆ check_group_min_max_predicates()

static bool check_group_min_max_predicates	(	Item *	cond,
		Item_field *	min_max_arg_item,
		Field::imagetype	image_type
	)

static

◆ check_key_infix()

static bool check_key_infix	(	SEL_ROOT *	index_range_tree,
		KEY_PART_INFO *	first_non_group_part,
		KEY_PART_INFO *	min_max_arg_part,
		KEY_PART_INFO *	last_part,
		uint *	key_infix_len,
		KEY_PART_INFO **	first_non_infix_part,
		uint *	infix_factor,
		KEY *	index_info
	)

static

◆ collect_group_skip_scans()

void collect_group_skip_scans	(	THD *	thd,
		RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree,
		enum_order	order_direction,
		bool	skip_records_in_range,
		Mem_root_array< GroupIndexSkipScanInfo * > *	possible_group_skip_scans,
		bool &	have_min,
		bool &	have_max
	)

Analyze indexes to see if a group index skip scan is possible and save GroupIndexSkipScanInfo data for every possible group index skip scan.

Return a list of GroupIndexSkipScanInfo* objects which can be used to build a GROUP_INDEX_SKIP_SCAN AccessPath.

Parameters

thd	Thread info
param	Range optimizer parameter
tree	Range tree generated by get_mm_tree
order_direction	The sort order the range access method must be able to provide. Three-value logic: asc/desc/don't care
skip_records_in_range	Same value as JOIN_TAB::skip_records_in_range()
possible_group_skip_scans	List to populate with candidate group skip scans
have_min	Set to true if MIN() function is present
have_max	Set to true if MAX() function is present

Test (Part of WA2): Skip loose index scan on disjunctive WHERE clause which results in null tree or merge tree.

The tree structure contains multiple disjoint trees. This happens when the WHERE clause can't be represented in a single range tree due to the disjunctive nature of it but there exists indexes to perform index merge scan.

Skip loose index scan if min_max attribute is present along with at least one other attribute in the WHERE cluse when the tree is null. There is no range tree if WHERE condition can't be represented in a single range tree and index merge is not possible.

Test Part of WA2:If there are conditions on a column C participating in MIN/MAX, those conditions must be conjunctions to all earlier keyparts. Otherwise, Loose Index Scan cannot be used.

◆ cost_group_skip_scan()

static void cost_group_skip_scan	(	TABLE *	table,
		uint	key,
		uint	used_key_parts,
		uint	group_key_parts,
		SEL_TREE *	range_tree,
		ha_rows	quick_prefix_records,
		bool	have_min,
		bool	have_max,
		uint	infix_factor,
		Cost_estimate *	cost_est,
		ha_rows *	records,
		bool	single_group
	)

static

◆ dbug_dump_group_index_skip_scan()

void dbug_dump_group_index_skip_scan	(	int	indent,
		bool	verbose,
		const AccessPath *	path
	)

◆ get_all_group_skip_scans()

Mem_root_array< AccessPath * > get_all_group_skip_scans	(	THD *	thd,
		RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree,
		enum_order	order_direction,
		bool	skip_records_in_range,
		double	cost_est
	)

Test if group index skip scan is applicable and if so, construct a new AccessPath for every candidate group index skip scan.

Parameters

thd	Thread info
param	Range optimizer parameter
tree	Range tree generated by get_mm_tree
order_direction	The sort order the range access method must be able to provide. Three-value logic: asc/desc/don't care
skip_records_in_range	Same value as JOIN_TAB::skip_records_in_range()
cost_est	Best cost so far (=table/index scan time)

Return values

Mem_root_array of candidate GROUP_INDEX_SKIP_SCAN AccessPaths.

◆ get_best_group_skip_scan()

AccessPath * get_best_group_skip_scan	(	THD *	thd,
		RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree,
		enum_order	order_direction,
		bool	skip_records_in_range,
		double	cost_est
	)

Test if this access method is applicable to a GROUP query with MIN/MAX functions, and if so, construct a new AccessPath.

DESCRIPTION Test whether a query can be computed via a GroupIndexSkipScanIterator. Queries computable via a GroupIndexSkipScanIterator must satisfy the following conditions: A) Table T has at least one compound index I of the form: I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]> B) Query conditions: B0. Q is over a single table T. For a single table query which is internally transformed into a multi-table query E.g. due to semijoin transformations, group skip scan can still be used for the original table specified in the query for duplicate removal. E.g. SELECT DISTINCT f1 FROM t1 IN (SELECT f1 FROM t2); In this case, with a semi-join transformation, query would look like SELECT DISTINCT f1 FROM t1 semi-join t2 ON t1.f1=t2.f1; Group skip scan can still be used for table "t1" for duplicate removal even though the query has a JOIN now. B1. The attributes referenced by Q are a subset of the attributes of I. B2. All attributes QA in Q can be divided into 3 overlapping groups:

SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is referenced by any number of MIN and/or MAX functions if present.
WA = {W_1, ..., W_p} - from the WHERE clause
GA = <G_1, ..., G_k> - from the GROUP BY clause (if any) = SA - if Q is a DISTINCT query (based on the equivalence of DISTINCT and GROUP queries.
NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in GROUP BY and not referenced by MIN/MAX functions. with the following properties specified below. B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not applicable.

SA1. There is at most one attribute in SA referenced by any number of MIN and/or MAX functions which, which if present, is denoted as C. SA2. The position of the C attribute in the index is after the last A_k. SA3. The attribute C can be referenced in the WHERE clause only in predicates of the forms:

(C {< | <= | > | >= | =} const)
(const {< | <= | > | >= | =} C)
(C between const_i and const_j)
C IS NULL
C IS NOT NULL
C != const SA4. If Q has a GROUP BY clause, there are no other aggregate functions except MIN and MAX. For queries with DISTINCT, aggregate functions are allowed. SA5. The select list in DISTINCT queries should not contain expressions. SA6. Clustered index can not be used by GROUP_MIN_MAX quick select for AGG_FUNC(DISTINCT ...) optimization because cursor position is never stored after a unique key lookup in the clustered index and further index_next/prev calls can not be used. So loose index scan optimization can not be used in this case. SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this access method is not used. For above queries MIN/MAX() aggregation has to be done at nested_loops_join (end_send_group). But with current design MIN/MAX() is always set as part of loose index scan. Because of this mismatch MIN() and MAX() values will be set incorrectly. For such queries to work we need a new interface for loose index scan. This new interface should only fetch records with min and max values and let end_send_group to do aggregation. Until then do not use loose_index_scan. GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if G_i = A_j => i = j. GA2. If Q has a DISTINCT clause, then there is a permutation of SA that forms a prefix of I. This permutation is used as the GROUP clause when the DISTINCT query is converted to a GROUP query. GA3. The attributes in GA may participate in arbitrary predicates, divided into two groups:
RNG(G_1,...,G_q ; where q <= k) is a range condition over the attributes of a prefix of GA
PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset of GA. Since P is applied to only GROUP attributes it filters some groups, and thus can be applied after the grouping. GA4. There are no expressions among G_i, just direct column references. NGA1.If in the index I there is a gap between the last GROUP attribute G_k, and the MIN/MAX attribute C, then NGA must consist of exactly the index attributes that constitute the gap. As a result there is a permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap in the index. NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of equality conditions for all NG_i of the form (NG_i = const) or (const = NG_i), such that each NG_i is referenced in exactly one conjunct. Informally, the predicates provide constants to fill the gap in the index. WA1. There are no other attributes in the WHERE clause except the ones referenced in predicates RNG, PA, PC, EQ defined above. Therefore WA is subset of (GA union NGA union C) for GA,NGA,C that pass the above tests. By transitivity then it also follows that each WA_i participates in the index I (if this was already tested for GA, NGA and C). WA2. If there is a predicate on C, then it must be in conjunction to all predicates on all earlier keyparts in I.

C) Overall query form: SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)]) FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND EQ(B_1,...,B_m)] [AND PC(C)] [AND PA(A_i1,...,A_iq)] GROUP BY A_1,...,A_k [HAVING PH(A_1, ..., B_1,..., C)] where EXPR(...) is an arbitrary expression over some or all SELECT fields, or: SELECT DISTINCT A_i1,...,A_ik FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND PA(A_i1,...,A_iq)];

NOTES If the current query satisfies the conditions above, and if (mem_root! = NULL), then the function constructs and returns a new AccessPath. object, that is later used to construct a new GroupIndexSkipScanIterator. If (mem_root == nullptr), then the function only tests whether the current query satisfies the conditions above, and, if so, sets is_applicable = true.

Queries with DISTINCT for which index access can be used are transformed into equivalent group-by queries of the form:

SELECT A_1,...,A_k FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND PA(A_i1,...,A_iq)] GROUP BY A_1,...,A_k;

The group-by list is a permutation of the select attributes, according to their order in the index.

TODO

What happens if the query groups by the MIN/MAX field, and there is no other field as in: "select min(a) from t1 group by a" ?
We assume that the general correctness of the GROUP-BY query was checked before this point. Is this correct, or do we have to check it completely?
Lift the limitation in condition (B3), that is, make this access method applicable to ROLLUP queries.

Parameters

thd	Thread handle
param	Parameter from test_quick_select
tree	Range tree generated by get_mm_tree
order_direction	The sort order the range access method must be able to provide. Three-value logic: asc/desc/don't care
skip_records_in_range	Same value as JOIN_TAB::skip_records_in_range().
cost_est	Best cost so far (=table/index scan time)

Returns: group_skip_scan_path access path for best group skip scan

Return values

NULL	Group index skip scan not applicable or mem_root == NULL
!NULL	Group index skip scan table read plan

◆ get_field_keypart()

static uint get_field_keypart	(	KEY *	index,
		const Field *	field
	)

inlinestatic

◆ make_group_skip_scan_path()

AccessPath * make_group_skip_scan_path	(	THD *	thd,
		RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree,
		GroupIndexSkipScanInfo *	group_skip_scan_info,
		double	cost_est,
		bool	have_min,
		bool	have_max
	)

Build a GROUP_INDEX_SKIP_SCAN AccessPath based on scan info.

Parameters

thd	Thread info
param	Range optimizer parameter
tree	Range tree generated by get_mm_tree
group_skip_scan_info	Info for a candidate group skip scan
cost_est	Best cost so far (=table/index scan time)
have_min	Set to true if MIN() function is present
have_max	Set to true if MAX() function is present

◆ min_max_inspect_cond_for_fields()

static bool min_max_inspect_cond_for_fields	(	Item *	cond,
		Item_field *	min_max_arg_item,
		bool *	min_max_arg_present,
		bool *	non_min_max_arg_present
	)

static

This function detects the presents of MIN/MAX field along with at least one non MIN/MAX field participation in the given condition.

Subqueries inspection is skipped as of now.

Parameters

	cond	tree (or subtree) describing all or part of the WHERE clause being analyzed.
	min_max_arg_item	The field referenced by the MIN/MAX function(s).
[out]	min_max_arg_present	This out parameter is set to true if MIN/MAX argument is present in cond.
[out]	non_min_max_arg_present	This out parameter is set to true if any field item other than MIN/MAX argument is present in cond.

Returns: TRUE if both MIN/MAX field and non MIN/MAX field is present in cond. FALSE o/w.

◆ select_best_group_skip_scan()

GroupIndexSkipScanInfo * select_best_group_skip_scan ( Mem_root_array< GroupIndexSkipScanInfo * > * possible_group_skip_scans )

Iterates over list of group skip scan candidates and returns pointer to candidate with lowest cost.

Parameters

possible_group_skip_scans List of candidate group index skip scans

Return values

best_scan Pointer to element in possible_group_skip_scans which has lowest cost.

◆ trace_basic_info_group_index_skip_scan()

void trace_basic_info_group_index_skip_scan	(	THD *	thd,
		const AccessPath *	path,
		const RANGE_OPT_PARAM *	,
		Opt_trace_object *	trace_object
	)

◆ util_min_max_inspect_item()

static void util_min_max_inspect_item	(	Item *	item_field,
		Item_field *	min_max_arg_item,
		bool *	min_max_arg_present,
		bool *	non_min_max_arg_present
	)

inlinestatic

Utility function used by min_max_inspect_cond_for_fields() for comparing FIELD item with given MIN/MAX item and setting appropriate out parameter.

Parameters

	item_field	Item field for comparison.
	min_max_arg_item	The field referenced by the MIN/MAX function(s).
[out]	min_max_arg_present	This out parameter is set to true if MIN/MAX argument is present in cond.
[out]	non_min_max_arg_present	This out parameter is set to true if any field item other than MIN/MAX argument is present in cond.

Functions

Function Documentation

◆ add_range()

◆ check_group_min_max_predicates()

◆ check_key_infix()

◆ collect_group_skip_scans()

◆ cost_group_skip_scan()

◆ dbug_dump_group_index_skip_scan()

◆ get_all_group_skip_scans()

◆ get_best_group_skip_scan()

◆ get_field_keypart()

◆ make_group_skip_scan_path()

◆ min_max_inspect_cond_for_fields()

◆ select_best_group_skip_scan()

◆ trace_basic_info_group_index_skip_scan()

◆ util_min_max_inspect_item()