MySQL 8.0.40
Source Code Documentation
group_index_skip_scan_plan.cc File Reference
#include "sql/range_optimizer/group_index_skip_scan_plan.h"
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include <algorithm>
#include <limits>
#include "m_ctype.h"
#include "mem_root_deque.h"
#include "my_bitmap.h"
#include "my_compiler.h"
#include "my_dbug.h"
#include "mysql/udf_registration_types.h"
#include "sql/field.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_cmpfunc.h"
#include "sql/item_func.h"
#include "sql/item_sum.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/key.h"
#include "sql/key_spec.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_statistics.h"
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/parser_yystype.h"
#include "sql/range_optimizer/group_index_skip_scan.h"
#include "sql/range_optimizer/index_range_scan_plan.h"
#include "sql/range_optimizer/internal.h"
#include "sql/range_optimizer/path_helpers.h"
#include "sql/range_optimizer/range_opt_param.h"
#include "sql/range_optimizer/tree.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_select.h"
#include "sql/table.h"
#include "sql/visible_fields.h"
#include "sql_string.h"
#include "template_utils.h"

Functions

static bool add_range (MEM_ROOT *return_mem_root, SEL_ARG *sel_range, uint key_length, Quick_ranges *range_array)
 
void trace_basic_info_group_index_skip_scan (THD *thd, const AccessPath *path, const RANGE_OPT_PARAM *, Opt_trace_object *trace_object)
 
static uint get_field_keypart (KEY *index, const Field *field)
 
static bool check_key_infix (SEL_ROOT *index_range_tree, KEY_PART_INFO *first_non_group_part, KEY_PART_INFO *min_max_arg_part, KEY_PART_INFO *last_part, uint *key_infix_len, KEY_PART_INFO **first_non_infix_part, uint *infix_factor, KEY *index_info)
 
static bool check_group_min_max_predicates (Item *cond, Item_field *min_max_arg_item, Field::imagetype image_type)
 
static bool min_max_inspect_cond_for_fields (Item *cond, Item_field *min_max_arg_item, bool *min_max_arg_present, bool *non_min_max_arg_present)
 This function detects the presents of MIN/MAX field along with at least one non MIN/MAX field participation in the given condition. More...
 
static void cost_group_min_max (TABLE *table, uint key, uint used_key_parts, uint group_key_parts, SEL_TREE *range_tree, ha_rows quick_prefix_records, bool have_min, bool have_max, uint infix_factor, Cost_estimate *cost_est, ha_rows *records, bool single_group)
 
AccessPathget_best_group_min_max (THD *thd, RANGE_OPT_PARAM *param, SEL_TREE *tree, enum_order order_direction, bool skip_records_in_range, double cost_est)
 Test if this access method is applicable to a GROUP query with MIN/MAX functions, and if so, construct a new AccessPath. More...
 
static void util_min_max_inspect_item (Item *item_field, Item_field *min_max_arg_item, bool *min_max_arg_present, bool *non_min_max_arg_present)
 Utility function used by min_max_inspect_cond_for_fields() for comparing FIELD item with given MIN/MAX item and setting appropriate out parameter. More...
 
void dbug_dump_group_index_skip_scan (int indent, bool, const AccessPath *path)
 

Function Documentation

◆ add_range()

static bool add_range ( MEM_ROOT return_mem_root,
SEL_ARG sel_range,
uint  key_length,
Quick_ranges range_array 
)
static

◆ check_group_min_max_predicates()

static bool check_group_min_max_predicates ( Item cond,
Item_field min_max_arg_item,
Field::imagetype  image_type 
)
static

◆ check_key_infix()

static bool check_key_infix ( SEL_ROOT index_range_tree,
KEY_PART_INFO first_non_group_part,
KEY_PART_INFO min_max_arg_part,
KEY_PART_INFO last_part,
uint key_infix_len,
KEY_PART_INFO **  first_non_infix_part,
uint infix_factor,
KEY index_info 
)
static

◆ cost_group_min_max()

static void cost_group_min_max ( TABLE table,
uint  key,
uint  used_key_parts,
uint  group_key_parts,
SEL_TREE range_tree,
ha_rows  quick_prefix_records,
bool  have_min,
bool  have_max,
uint  infix_factor,
Cost_estimate cost_est,
ha_rows records,
bool  single_group 
)
static

◆ dbug_dump_group_index_skip_scan()

void dbug_dump_group_index_skip_scan ( int  indent,
bool  verbose,
const AccessPath path 
)

◆ get_best_group_min_max()

AccessPath * get_best_group_min_max ( THD thd,
RANGE_OPT_PARAM param,
SEL_TREE tree,
enum_order  order_direction,
bool  skip_records_in_range,
double  cost_est 
)

Test if this access method is applicable to a GROUP query with MIN/MAX functions, and if so, construct a new AccessPath.

DESCRIPTION Test whether a query can be computed via a GroupIndexSkipScanIterator. Queries computable via a GroupIndexSkipScanIterator must satisfy the following conditions: A) Table T has at least one compound index I of the form: I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]> B) Query conditions: B0. Q is over a single table T. B1. The attributes referenced by Q are a subset of the attributes of I. B2. All attributes QA in Q can be divided into 3 overlapping groups:

  • SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is referenced by any number of MIN and/or MAX functions if present.
  • WA = {W_1, ..., W_p} - from the WHERE clause
  • GA = <G_1, ..., G_k> - from the GROUP BY clause (if any) = SA - if Q is a DISTINCT query (based on the equivalence of DISTINCT and GROUP queries.
  • NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in GROUP BY and not referenced by MIN/MAX functions. with the following properties specified below. B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not applicable.

SA1. There is at most one attribute in SA referenced by any number of MIN and/or MAX functions which, which if present, is denoted as C. SA2. The position of the C attribute in the index is after the last A_k. SA3. The attribute C can be referenced in the WHERE clause only in predicates of the forms:

  • (C {< | <= | > | >= | =} const)
  • (const {< | <= | > | >= | =} C)
  • (C between const_i and const_j)
  • C IS NULL
  • C IS NOT NULL
  • C != const SA4. If Q has a GROUP BY clause, there are no other aggregate functions except MIN and MAX. For queries with DISTINCT, aggregate functions are allowed. SA5. The select list in DISTINCT queries should not contain expressions. SA6. Clustered index can not be used by GROUP_MIN_MAX quick select for AGG_FUNC(DISTINCT ...) optimization because cursor position is never stored after a unique key lookup in the clustered index and further index_next/prev calls can not be used. So loose index scan optimization can not be used in this case. SA7. If Q has both AGG_FUNC(DISTINCT ...) and MIN/MAX() functions then this access method is not used. For above queries MIN/MAX() aggregation has to be done at nested_loops_join (end_send_group). But with current design MIN/MAX() is always set as part of loose index scan. Because of this mismatch MIN() and MAX() values will be set incorrectly. For such queries to work we need a new interface for loose index scan. This new interface should only fetch records with min and max values and let end_send_group to do aggregation. Until then do not use loose_index_scan. GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if G_i = A_j => i = j. GA2. If Q has a DISTINCT clause, then there is a permutation of SA that forms a prefix of I. This permutation is used as the GROUP clause when the DISTINCT query is converted to a GROUP query. GA3. The attributes in GA may participate in arbitrary predicates, divided into two groups:
  • RNG(G_1,...,G_q ; where q <= k) is a range condition over the attributes of a prefix of GA
  • PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset of GA. Since P is applied to only GROUP attributes it filters some groups, and thus can be applied after the grouping. GA4. There are no expressions among G_i, just direct column references. NGA1.If in the index I there is a gap between the last GROUP attribute G_k, and the MIN/MAX attribute C, then NGA must consist of exactly the index attributes that constitute the gap. As a result there is a permutation of NGA, BA=<B_1,...,B_m>, that coincides with the gap in the index. NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of equality conditions for all NG_i of the form (NG_i = const) or (const = NG_i), such that each NG_i is referenced in exactly one conjunct. Informally, the predicates provide constants to fill the gap in the index. WA1. There are no other attributes in the WHERE clause except the ones referenced in predicates RNG, PA, PC, EQ defined above. Therefore WA is subset of (GA union NGA union C) for GA,NGA,C that pass the above tests. By transitivity then it also follows that each WA_i participates in the index I (if this was already tested for GA, NGA and C). WA2. If there is a predicate on C, then it must be in conjunction to all predicates on all earlier keyparts in I.

C) Overall query form: SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)]) FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND EQ(B_1,...,B_m)] [AND PC(C)] [AND PA(A_i1,...,A_iq)] GROUP BY A_1,...,A_k [HAVING PH(A_1, ..., B_1,..., C)] where EXPR(...) is an arbitrary expression over some or all SELECT fields, or: SELECT DISTINCT A_i1,...,A_ik FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND PA(A_i1,...,A_iq)];

NOTES If the current query satisfies the conditions above, and if (mem_root! = NULL), then the function constructs and returns a new AccessPath. object, that is later used to construct a new GroupIndexSkipScanIterator. If (mem_root == nullptr), then the function only tests whether the current query satisfies the conditions above, and, if so, sets is_applicable = true.

Queries with DISTINCT for which index access can be used are transformed into equivalent group-by queries of the form:

SELECT A_1,...,A_k FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND PA(A_i1,...,A_iq)] GROUP BY A_1,...,A_k;

The group-by list is a permutation of the select attributes, according to their order in the index.

TODO

  • What happens if the query groups by the MIN/MAX field, and there is no other field as in: "select min(a) from t1 group by a" ?
  • We assume that the general correctness of the GROUP-BY query was checked before this point. Is this correct, or do we have to check it completely?
  • Lift the limitation in condition (B3), that is, make this access method applicable to ROLLUP queries.
Parameters
thdThread handle
paramParameter from test_quick_select
treeRange tree generated by get_mm_tree
cost_estBest cost so far (=table/index scan time)
order_directionThe sort order the range access method must be able to provide. Three-value logic: asc/desc/don't care
skip_records_in_rangeSame value as JOIN_TAB::skip_records_in_range().
Returns
table read plan
Return values
NULLLoose index scan not applicable or mem_root == NULL
!NULLLoose index scan table read plan

Test (Part of WA2): Skip loose index scan on disjunctive WHERE clause which results in null tree or merge tree.

The tree structure contains multiple disjoint trees. This happens when the WHERE clause can't be represented in a single range tree due to the disjunctive nature of it but there exists indexes to perform index merge scan.

Skip loose index scan if min_max attribute is present along with at least one other attribute in the WHERE cluse when the tree is null. There is no range tree if WHERE condition can't be represented in a single range tree and index merge is not possible.

Test Part of WA2:If there are conditions on a column C participating in MIN/MAX, those conditions must be conjunctions to all earlier keyparts. Otherwise, Loose Index Scan cannot be used.

◆ get_field_keypart()

static uint get_field_keypart ( KEY index,
const Field field 
)
inlinestatic

◆ min_max_inspect_cond_for_fields()

static bool min_max_inspect_cond_for_fields ( Item cond,
Item_field min_max_arg_item,
bool *  min_max_arg_present,
bool *  non_min_max_arg_present 
)
static

This function detects the presents of MIN/MAX field along with at least one non MIN/MAX field participation in the given condition.

Subqueries inspection is skipped as of now.

Parameters
condtree (or subtree) describing all or part of the WHERE clause being analyzed.
min_max_arg_itemThe field referenced by the MIN/MAX function(s).
[out]min_max_arg_presentThis out parameter is set to true if MIN/MAX argument is present in cond.
[out]non_min_max_arg_presentThis out parameter is set to true if any field item other than MIN/MAX argument is present in cond.
Returns
TRUE if both MIN/MAX field and non MIN/MAX field is present in cond. FALSE o/w.

◆ trace_basic_info_group_index_skip_scan()

void trace_basic_info_group_index_skip_scan ( THD thd,
const AccessPath path,
const RANGE_OPT_PARAM ,
Opt_trace_object trace_object 
)

◆ util_min_max_inspect_item()

static void util_min_max_inspect_item ( Item item_field,
Item_field min_max_arg_item,
bool *  min_max_arg_present,
bool *  non_min_max_arg_present 
)
inlinestatic

Utility function used by min_max_inspect_cond_for_fields() for comparing FIELD item with given MIN/MAX item and setting appropriate out parameter.

Parameters
item_fieldItem field for comparison.
min_max_arg_itemThe field referenced by the MIN/MAX function(s).
[out]min_max_arg_presentThis out parameter is set to true if MIN/MAX argument is present in cond.
[out]non_min_max_arg_presentThis out parameter is set to true if any field item other than MIN/MAX argument is present in cond.