MySQL 8.0.29
Source Code Documentation
join_optimizer.cc File Reference
#include "sql/join_optimizer/join_optimizer.h"
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include <array>
#include <bitset>
#include <initializer_list>
#include <iterator>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "ft_global.h"
#include "map_helpers.h"
#include "mem_root_deque.h"
#include "my_alloc.h"
#include "my_base.h"
#include "my_bitmap.h"
#include "my_inttypes.h"
#include "my_sys.h"
#include "my_table_map.h"
#include "mysql/components/services/bits/psi_bits.h"
#include "mysql/udf_registration_types.h"
#include "mysqld_error.h"
#include "prealloced_array.h"
#include "scope_guard.h"
#include "sql/field.h"
#include "sql/filesort.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_cmpfunc.h"
#include "sql/item_func.h"
#include "sql/item_sum.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/bit_utils.h"
#include "sql/join_optimizer/build_interesting_orders.h"
#include "sql/join_optimizer/compare_access_paths.h"
#include "sql/join_optimizer/cost_model.h"
#include "sql/join_optimizer/estimate_selectivity.h"
#include "sql/join_optimizer/explain_access_path.h"
#include "sql/join_optimizer/find_contained_subqueries.h"
#include "sql/join_optimizer/graph_simplification.h"
#include "sql/join_optimizer/interesting_orders.h"
#include "sql/join_optimizer/interesting_orders_defs.h"
#include "sql/join_optimizer/make_join_hypergraph.h"
#include "sql/join_optimizer/node_map.h"
#include "sql/join_optimizer/online_cycle_finder.h"
#include "sql/join_optimizer/overflow_bitset.h"
#include "sql/join_optimizer/print_utils.h"
#include "sql/join_optimizer/relational_expression.h"
#include "sql/join_optimizer/subgraph_enumeration.h"
#include "sql/join_optimizer/walk_access_paths.h"
#include "sql/join_type.h"
#include "sql/key.h"
#include "sql/key_spec.h"
#include "sql/mem_root_array.h"
#include "sql/opt_costmodel.h"
#include "sql/query_options.h"
#include "sql/range_optimizer/index_range_scan_plan.h"
#include "sql/range_optimizer/internal.h"
#include "sql/range_optimizer/range_analysis.h"
#include "sql/range_optimizer/range_opt_param.h"
#include "sql/range_optimizer/range_optimizer.h"
#include "sql/range_optimizer/tree.h"
#include "sql/sql_array.h"
#include "sql/sql_base.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_cmd.h"
#include "sql/sql_const.h"
#include "sql/sql_executor.h"
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_opt_exec_shared.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_select.h"
#include "sql/sql_tmp_table.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/table_function.h"
#include "sql/temp_table_param.h"
#include "sql/uniques.h"
#include "sql/window.h"
#include "template_utils.h"

Classes

class  anonymous_namespace{join_optimizer.cc}::CostingReceiver
 CostingReceiver contains the main join planning logic, selecting access paths based on cost. More...
 
struct  anonymous_namespace{join_optimizer.cc}::CostingReceiver::AccessPathSet
 Besides the access paths for a set of nodes (see m_access_paths), AccessPathSet contains information that is common between all access paths for that set. More...
 
struct  anonymous_namespace{join_optimizer.cc}::PossibleRangeScan
 
struct  anonymous_namespace{join_optimizer.cc}::PossibleIndexMerge
 Represents a candidate index merge, ie. More...
 
struct  anonymous_namespace{join_optimizer.cc}::KeypartForRef
 

Namespaces

namespace  hypergraph
 
namespace  anonymous_namespace{join_optimizer.cc}
 

Functions

string anonymous_namespace{join_optimizer.cc}::PrintAccessPath (const AccessPath &path, const JoinHypergraph &graph, const char *description_for_trace)
 
AccessPathanonymous_namespace{join_optimizer.cc}::CreateMaterializationPath (THD *thd, JOIN *join, AccessPath *path, TABLE *temp_table, Temp_table_param *temp_table_param, bool copy_items)
 Sets up an access path for materializing the results returned from a path in a temporary table. More...
 
AccessPathanonymous_namespace{join_optimizer.cc}::GetSafePathToSort (THD *thd, JOIN *join, AccessPath *path, bool need_rowid)
 
SecondaryEngineFlags anonymous_namespace{join_optimizer.cc}::EngineFlags (const THD *thd)
 Lists the current secondary engine flags in use. More...
 
secondary_engine_modify_access_path_cost_t anonymous_namespace{join_optimizer.cc}::SecondaryEngineCostHook (const THD *thd)
 Gets the secondary storage engine cost modification function, if any. More...
 
Item_func_matchanonymous_namespace{join_optimizer.cc}::GetSargableFullTextPredicate (const Predicate &predicate)
 Returns the MATCH function of a predicate that can be pushed down to a full-text index. More...
 
void anonymous_namespace{join_optimizer.cc}::FindAppliedAndSubsumedPredicatesForRangeScan (THD *thd, KEY *key, unsigned used_key_parts, unsigned num_exact_key_parts, TABLE *table, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, OverflowBitset *applied_predicates_out, OverflowBitset *subsumed_predicates_out)
 
bool anonymous_namespace{join_optimizer.cc}::CollectPossibleRangeScans (THD *thd, SEL_TREE *tree, RANGE_OPT_PARAM *param, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, Mem_root_array< PossibleRangeScan > *possible_scans)
 
double anonymous_namespace{join_optimizer.cc}::EstimateOutputRowsFromRangeTree (THD *thd, const RANGE_OPT_PARAM &param, ha_rows total_rows, const Mem_root_array< PossibleRangeScan > &possible_scans, const JoinHypergraph &graph, OverflowBitset predicates, string *trace)
 Based on estimates for all the different range scans (which cover different but potentially overlapping combinations of predicates), try to find an estimate for the number of rows scanning the given table, with all predicates applied. More...
 
AccessPathanonymous_namespace{join_optimizer.cc}::FindCheapestIndexRangeScan (THD *thd, SEL_TREE *tree, RANGE_OPT_PARAM *param, bool prefer_clustered_primary_key_scan, bool *inexact)
 From a collection of index scans, find the single cheapest one and generate an AccessPath for it. More...
 
int anonymous_namespace{join_optimizer.cc}::WasPushedDownToRef (Item *condition, const KeypartForRef *keyparts, unsigned num_keyparts)
 
bool anonymous_namespace{join_optimizer.cc}::ContainsSubqueries (Item *item_arg)
 
bool anonymous_namespace{join_optimizer.cc}::IsSubsumableFullTextPredicate (Item_func *condition)
 
bool anonymous_namespace{join_optimizer.cc}::IsLimitHintPushableToFullTextSearch (const Item_func_match *match, const JoinHypergraph &graph, uint64_t fulltext_predicates)
 
NodeMap anonymous_namespace{join_optimizer.cc}::FindReachableTablesFrom (NodeMap tables, const JoinHypergraph &graph)
 Find the set of tables we can join directly against, given that we have the given set of tables on one of the sides (effectively the same concept as DPhyp's “neighborhood”). More...
 
bool anonymous_namespace{join_optimizer.cc}::PartiallyResolvedParameterization (NodeMap parameter_tables, NodeMap other_side)
 
bool anonymous_namespace{join_optimizer.cc}::DisallowParameterizedJoinPath (AccessPath *left_path, AccessPath *right_path, NodeMap left, NodeMap right, NodeMap left_reachable, NodeMap right_reachable)
 Decide whether joining the two given paths would create a disallowed parameterized path. More...
 
AccessPathanonymous_namespace{join_optimizer.cc}::DeduplicateForSemijoin (THD *thd, AccessPath *path, Item **semijoin_group, int semijoin_group_size)
 Build an access path that deduplicates its input on a certain grouping. More...
 
uint32_t anonymous_namespace{join_optimizer.cc}::AddFlag (uint32_t flags, FuzzyComparisonResult flag)
 
bool anonymous_namespace{join_optimizer.cc}::HasFlag (uint32_t flags, FuzzyComparisonResult flag)
 
PathComparisonResult CompareAccessPaths (const LogicalOrderings &orderings, const AccessPath &a, const AccessPath &b, OrderingSet obsolete_orderings)
 
AccessPath anonymous_namespace{join_optimizer.cc}::MakeSortPathWithoutFilesort (THD *thd, AccessPath *child, ORDER *order, int ordering_state, int num_where_predicates)
 
bool anonymous_namespace{join_optimizer.cc}::CheckSupportedQuery (THD *thd)
 
AccessPathanonymous_namespace{join_optimizer.cc}::CreateMaterializationOrStreamingPath (THD *thd, JOIN *join, AccessPath *path, bool need_rowid, bool copy_items)
 Set up an access path for streaming or materializing through a temporary table. More...
 
bool anonymous_namespace{join_optimizer.cc}::IsMaterializationPath (const AccessPath *path)
 
void anonymous_namespace{join_optimizer.cc}::CollectItemsToMaterializeForFullTextAggregation (Item *root, mem_root_deque< Item * > *items)
 Goes through an item tree and collects all the sub-items that should be materialized if full-text search is used in combination with sort-based aggregation. More...
 
bool anonymous_namespace{join_optimizer.cc}::CreateTemporaryTableForFullTextFunctions (THD *thd, Query_block *query_block, TABLE **temp_table, Temp_table_param **temp_table_param)
 Creates a temporary table which materializes the results of all full-text functions that need to be accessible after aggregation. More...
 
void anonymous_namespace{join_optimizer.cc}::FindUpdateDeleteTargetTables (const THD *thd, Query_block *query_block, table_map *target_tables, table_map *immediate_candidates)
 Finds all the target tables of an UPDATE or DELETE statement. More...
 
NodeMap anonymous_namespace{join_optimizer.cc}::FindFullTextSearchedTables (const JoinHypergraph &graph)
 
bool anonymous_namespace{join_optimizer.cc}::IsSargableFullTextIndexPredicate (Item *condition)
 
uint64_t anonymous_namespace{join_optimizer.cc}::FindSargableFullTextPredicates (const JoinHypergraph &graph)
 
void anonymous_namespace{join_optimizer.cc}::EnableFullTextCoveringIndexes (const Query_block *query_block)
 
bool anonymous_namespace{join_optimizer.cc}::HasEqRefWithCache (AccessPath *path)
 Does this path contain an EQ_REF path which has caching enabled? More...
 
AccessPath anonymous_namespace{join_optimizer.cc}::CreateStreamingAggregationPath (THD *thd, AccessPath *path, const Query_block *query_block, bool rollup, TABLE *table, Temp_table_param *param)
 Creates an AGGREGATE AccessPath, possibly with an intermediary STREAM node if one is needed. More...
 
void anonymous_namespace{join_optimizer.cc}::SplitHavingCondition (THD *thd, Item *cond, Item **having_cond, Item **having_cond_wf)
 
void anonymous_namespace{join_optimizer.cc}::ApplyHavingCondition (THD *thd, Item *having_cond, Query_block *query_block, const char *description_for_trace, string *trace, Prealloced_array< AccessPath *, 4 > *root_candidates, CostingReceiver *receiver)
 
JoinHypergraph::NodeFindNodeWithTable (JoinHypergraph *graph, TABLE *table)
 
Prealloced_array< AccessPath *, 4 > ApplyDistinctAndOrder (THD *thd, const CostingReceiver &receiver, const LogicalOrderings &orderings, bool aggregation_is_unordered, int order_by_ordering_idx, int distinct_ordering_idx, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, FunctionalDependencySet fd_set, Query_block *query_block, bool need_rowid, bool force_sort_rowids, Prealloced_array< AccessPath *, 4 > root_candidates, string *trace)
 
static AccessPathApplyWindow (THD *thd, AccessPath *root_path, Window *window, JOIN *join, bool need_rowid_for_window)
 
static int FindBestOrderingForWindow (JOIN *join, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, Bounds_checked_array< bool > finished_windows, Bounds_checked_array< bool > tmp_buffer, int first_ordering_idx, int second_ordering_idx, Bounds_checked_array< bool > included_windows)
 Find the ordering that allows us to process the most unprocessed windows. More...
 
static Prealloced_array< AccessPath *, 4 > ApplyWindowFunctions (THD *thd, const CostingReceiver &receiver, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, bool aggregation_is_unordered, int order_by_ordering_idx, int distinct_ordering_idx, const JoinHypergraph &graph, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, Query_block *query_block, int num_where_predicates, bool need_rowid, Prealloced_array< AccessPath *, 4 > root_candidates, string *trace)
 Apply window functions. More...
 
static void PossiblyAddSargableCondition (THD *thd, Item *item, TABLE *force_table, int predicate_index, bool is_join_condition, JoinHypergraph *graph, string *trace)
 Find out whether “item” is a sargable condition; if so, add it to: More...
 
void FindSargablePredicates (THD *thd, string *trace, JoinHypergraph *graph)
 
static bool ComesFromSameMultiEquality (Item *cond1, Item_func_eq *cond2)
 
static void CacheCostInfoForJoinConditions (THD *thd, const Query_block *query_block, JoinHypergraph *graph, string *trace)
 For each edge, cache some information for each of its join conditions. More...
 
AccessPathFindBestQueryPlan (THD *thd, Query_block *query_block, string *trace)
 Find the lowest-cost plan (which hopefully is also the cheapest to execute) of all the legal ways to execute the query. More...
 

Function Documentation

◆ ApplyDistinctAndOrder()

Prealloced_array< AccessPath *, 4 > ApplyDistinctAndOrder ( THD thd,
const CostingReceiver &  receiver,
const LogicalOrderings orderings,
bool  aggregation_is_unordered,
int  order_by_ordering_idx,
int  distinct_ordering_idx,
const Mem_root_array< SortAheadOrdering > &  sort_ahead_orderings,
FunctionalDependencySet  fd_set,
Query_block query_block,
bool  need_rowid,
bool  force_sort_rowids,
Prealloced_array< AccessPath *, 4 >  root_candidates,
string *  trace 
)

◆ ApplyWindow()

static AccessPath * ApplyWindow ( THD thd,
AccessPath root_path,
Window window,
JOIN join,
bool  need_rowid_for_window 
)
static

◆ ApplyWindowFunctions()

static Prealloced_array< AccessPath *, 4 > ApplyWindowFunctions ( THD thd,
const CostingReceiver &  receiver,
const LogicalOrderings orderings,
FunctionalDependencySet  fd_set,
bool  aggregation_is_unordered,
int  order_by_ordering_idx,
int  distinct_ordering_idx,
const JoinHypergraph graph,
const Mem_root_array< SortAheadOrdering > &  sort_ahead_orderings,
Query_block query_block,
int  num_where_predicates,
bool  need_rowid,
Prealloced_array< AccessPath *, 4 >  root_candidates,
string *  trace 
)
static

Apply window functions.

Ordering of window functions is a tricky topic. We can apply window functions in any order that we'd like, but we would like to do as few sorts as possible. In its most general form, this would entail solving an instance of the traveling salesman problem (TSP), and although the number of windows is typically small (one or two in most queries), this can blow up for large numbers of windows.

Thankfully, window functions never add or remove rows. We also assume that all sorts are equally expensive (which isn't really true, as ones on more columns take more processing time and buffer, but it's close enough in practice), and we also ignore the fact that as we compute more buffers, the temporary tables and sort buffers will get more columns. These assumptions, combined with some reasonable assumptions about ordering transitivity (if an ordering A is more sorted than an ordering B, and B > C, then also A > C – the only thing that can disturb this is groupings, which we ignore for the sake of simplicity), mean that we need to care only about the number of sorts, and can do them greedily. Thus, at any point, we pick the ordering that allows us to process the largest number of windows, process them, remove them from consideration, and repeat until there are none left.

There is one more ordering complication; after windowing, we may have DISTINCT and/or ORDER BY, which may also benefit from groupings/orderings we leave after the last window. Thus, first of all, we see if there's an ordering that can satisfy them (ideally both if possible) and at least one window; if so, we save that ordering and those windows for last.

Temporary tables are set up in FinalizePlanForQueryBlock(). This is so that it is easier to have multiple different orderings for the temporary table parameters later.

◆ CacheCostInfoForJoinConditions()

static void CacheCostInfoForJoinConditions ( THD thd,
const Query_block query_block,
JoinHypergraph graph,
string *  trace 
)
static

For each edge, cache some information for each of its join conditions.

This reduces work when repeatedly applying these join conditions later on. In particular, FindContainedSubqueries() contains a large amount of virtual function calls that we would like to avoid doing every time we consider a given join.

◆ ComesFromSameMultiEquality()

static bool ComesFromSameMultiEquality ( Item cond1,
Item_func_eq cond2 
)
static

◆ CompareAccessPaths()

PathComparisonResult CompareAccessPaths ( const LogicalOrderings orderings,
const AccessPath a,
const AccessPath b,
OrderingSet  obsolete_orderings 
)

◆ FindBestOrderingForWindow()

static int FindBestOrderingForWindow ( JOIN join,
const LogicalOrderings orderings,
FunctionalDependencySet  fd_set,
const Mem_root_array< SortAheadOrdering > &  sort_ahead_orderings,
Bounds_checked_array< bool >  finished_windows,
Bounds_checked_array< bool >  tmp_buffer,
int  first_ordering_idx,
int  second_ordering_idx,
Bounds_checked_array< bool >  included_windows 
)
static

Find the ordering that allows us to process the most unprocessed windows.

If specified, we can also demand that the ordering satisfies one or two later orderings (for DISTINCT and/or ORDER BY).

Our priorities are, in strict order:

  1. Satisfying both DISTINCT and ORDER BY (if both are given; but see below).
  2. Satisfying the first operation after windowing (which is either DISTINCT or ORDER BY).
  3. Satisfying as many windows as possible.
  4. The shortest possible ordering (as a tie-breaker).

If first_ordering_idx is given, #2 is mandatory. #4 is so that we don't get strange situations where the user specifies e.g. OVER (ORDER BY i) and we choose an ordering i,j,k,l,... because it happened to be given somewhere else.

Note that normally, it is very hard to satisfy DISTINCT for a window function, because generally, it isn't constant for a given input (by nature, it also depends on other rows). But it can happen if the window frame is static; see main.window_functions_interesting_orders.

Parameters
joinContains the list of windows.
orderingsLogical orderings in the query block.
sort_ahead_orderingsCandidate orderings to consider.
fd_setActive functional dependencies.
finished_windowsWindows to ignore.
tmp_bufferTemporary space for keeping the best list of windows so far; must be as large as the number of values.
first_ordering_idxThe first ordering after the query block that we need to satisfy (-1 if none).
second_ordering_idxThe second ordering after the query block that we would like to satisfy (-1 if none).
[out]included_windowsWhich windows can be sorted using the given ordering.
Returns
An index into sort_ahead_orderings, or -1 if no ordering could be found that sorts at least one window (plus, if first_ordering_idx is set, follows that ordering).

◆ FindBestQueryPlan()

AccessPath * FindBestQueryPlan ( THD thd,
Query_block query_block,
string *  trace 
)

Find the lowest-cost plan (which hopefully is also the cheapest to execute) of all the legal ways to execute the query.

The overall order of operations is largely dictated by the standard:

  1. All joined tables, including join predicates.
  2. WHERE predicates (we push these down into #1 where allowed)
  3. GROUP BY (it is sometimes possible to push this down into #1, but we don't have the functionality to do so).
  4. HAVING.
  5. Window functions.
  6. DISTINCT.
  7. ORDER BY.
  8. LIMIT.
  9. SQL_BUFFER_RESULT (a MySQL extension).

The place where we have the most leeway by far is #1, which is why this part of the optimizer is generally called the join optimizer (there are potentially billions of different join orderings, whereas each of the other steps, except windowing, can only be done in one or two ways). But the various outputs of #1 can have different properties, that can make for higher or lower costs in the other steps. (For instance, LIMIT will affect candidates with different init_cost differently, and ordering properties can skip sorting in ORDER BY entirely.) Thus, we allow keeping multiple candidates in play at every step if they are meaningfully different, and only pick out the winning candidate based on cost at the very end.

◆ FindNodeWithTable()

JoinHypergraph::Node * FindNodeWithTable ( JoinHypergraph graph,
TABLE table 
)

◆ FindSargablePredicates()

void FindSargablePredicates ( THD thd,
string *  trace,
JoinHypergraph graph 
)

◆ PossiblyAddSargableCondition()

static void PossiblyAddSargableCondition ( THD thd,
Item item,
TABLE force_table,
int  predicate_index,
bool  is_join_condition,
JoinHypergraph graph,
string *  trace 
)
static

Find out whether “item” is a sargable condition; if so, add it to:

  • The list of sargable predicate for the tables (hypergraph nodes) the condition touches. For a regular condition, this will typically be one table; for a join condition, it will typically be two. If “force_table” is non-nullptr, only that table will be considered (this is used for join conditions, to ensure that we do not push down predicates that cannot, e.g. to the outer side of left joins).
  • The graph's global list of predicates, if it is not already present (predicate_index = -1). This will never happen for WHERE conditions, only for join conditions.