Classes
struct	ApplyDistinctParameters
	This struct implements a builder pattern for creating paths that do DISTINCT (sort with duplicate removal) and adding them as parent of the current candidate paths (except for candidate paths that do DISTINCT already). More...

class	CostingReceiver
	CostingReceiver contains the main join planning logic, selecting access paths based on cost. More...

struct	KeypartForRef

struct	PossibleIndexMerge
	Represents a candidate index merge, ie. More...

struct	PossibleIndexSkipScan
	Represents a candidate index skip scan, i.e. More...

struct	PossibleRangeScan

struct	PossibleRORScan

class	RefAccessBuilder
	A builder class for constructing REF (or EQ_REF) AccessPath objects. More...

Typedefs
using	AccessPathArray = Prealloced_array< AccessPath *, 4 >

Functions
string	PrintAccessPath (const AccessPath &path, const JoinHypergraph &graph, const char *description_for_trace)

void	PrintJoinOrder (const AccessPath path, string join_order)
	Used by optimizer trace to print join order of join paths. More...

AccessPath *	CreateMaterializationPath (THD thd, JOIN join, AccessPath path, TABLE temp_table, Temp_table_param temp_table_param, bool copy_items, double distinct_rows, MaterializePathParameters::DedupType dedup_reason)
	Sets up an access path for materializing the results returned from a path in a temporary table. More...

int	WasPushedDownToRef (Item condition, const KeypartForRef keyparts, unsigned num_keyparts)

SecondaryEngineFlags	EngineFlags (const THD *thd)
	Lists the current secondary engine flags in use. More...

secondary_engine_modify_view_ap_cost_t	SecondaryEngineCostHook (THD *thd)
	Gets the secondary storage engine cost modification function, if any. More...

secondary_engine_check_optimizer_request_t	SecondaryEngineStateCheckHook (const THD *thd)
	Gets the secondary storage engine hypergraph state hook function, if any. More...

Item_func_match *	GetSargableFullTextPredicate (const Predicate &predicate)
	Returns the MATCH function of a predicate that can be pushed down to a full-text index. More...

bool	IsDeleteStatement (const THD *thd)
	Is the current statement a DELETE statement? More...

bool	IsUpdateStatement (const THD *thd)
	Is the current statement a DELETE statement? More...

void	SetNumOutputRowsAfterFilter (AccessPath *path, double output_rows)
	Set the number of output rows after filter for an access path to a new value. More...

bool	CheckKilledOrError (THD *thd)
	Check if the statement is killed or an error has been raised. More...

void	FindAppliedAndSubsumedPredicatesForRangeScan (THD thd, KEY key, unsigned used_key_parts, unsigned num_exact_key_parts, TABLE table, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, OverflowBitset applied_predicates_out, OverflowBitset *subsumed_predicates_out)

bool	CollectPossibleRangeScans (THD thd, SEL_TREE tree, RANGE_OPT_PARAM param, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, Mem_root_array< PossibleRangeScan > possible_scans)

ha_rows	EstimateRowsFromIndexMerge (THD thd, const SEL_IMERGE imerge, RANGE_OPT_PARAM *param)

double	EstimateOutputRowsFromRangeTree (THD *thd, RANGE_OPT_PARAM &param, ha_rows total_rows, const Mem_root_array< PossibleRangeScan > &possible_scans, const Mem_root_array< PossibleIndexMerge > &possible_imerges, const JoinHypergraph &graph, OverflowBitset predicates)
	Based on estimates for all the different range scans (which cover different but potentially overlapping combinations of predicates), try to find an estimate for the number of rows scanning the given table, with all predicates applied. More...

AccessPath *	FindCheapestIndexRangeScan (THD thd, SEL_TREE tree, RANGE_OPT_PARAM param, bool prefer_clustered_primary_key_scan, bool inexact, bool need_rowid_ordered_rows)
	From a collection of index scans, find the single cheapest one and generate an AccessPath for it. More...

void	UpdateAppliedAndSubsumedPredicates (const uint idx, const Mem_root_array< PossibleRORScan > &possible_ror_scans, const RANGE_OPT_PARAM param, OverflowBitset applied_predicates, OverflowBitset *subsumed_predicates)

int	GetRowIdOrdering (const TABLE table, const LogicalOrderings orderings, const Mem_root_array< ActiveIndexInfo > *active_indexes)

bool	ContainsSubqueries (Item *item_arg)

bool	HasConstantEqualityForField (const Mem_root_array< SargablePredicate > &sargable_predicates, const Field *field)
	Do we have a sargable predicate which checks if "field" is equal to a constant? More...

bool	IsSubsumableFullTextPredicate (Item_func *condition)

bool	IsLimitHintPushableToFullTextSearch (const Item_func_match *match, const JoinHypergraph &graph, uint64_t fulltext_predicates)

std::optional< double >	GetTableAfterFiltersCardinalityFromHypergraph (int node_idx, OverflowBitset applied_predicates, JoinHypergraph *graph)
	Attempt to retrieve the estimated cardinality for the given base table node (after filters have been applied) from the Hypergraph. More...

bool	LateralDependenciesAreSatisfied (int node_idx, NodeMap tables, const JoinHypergraph &graph)
	Checks if the table given by "node_idx" has all its lateral dependencies satisfied by the set of tables given by "tables". More...

NodeMap	FindReachableTablesFrom (NodeMap tables, const JoinHypergraph &graph)
	Find the set of tables we can join directly against, given that we have the given set of tables on one of the sides (effectively the same concept as DPhyp's “neighborhood”). More...

bool	CanResolveMoreParameterTables (NodeMap outer, NodeMap inner, NodeMap outer_parameters, NodeMap inner_parameters, NodeMap outer_reachable, NodeMap inner_reachable)
	Is it possible to resolve more parameter tables before performing a nested loop join between "outer" and "inner", or will the join have to be performed first? More...

bool	DisallowParameterizedJoinPath (AccessPath left_path, AccessPath right_path, NodeMap left, NodeMap right, NodeMap left_reachable, NodeMap right_reachable, bool is_reorderable)
	Decide whether joining the two given paths would create a disallowed parameterized path. More...

bool	IsEmptyJoin (const RelationalExpression::Type join_type, bool left_is_empty, bool right_is_empty)
	Checks if the result of a join is empty, given that it is known that one or both of the join legs always produces an empty result. More...

void	MoveDegenerateJoinConditionToFilter (THD thd, Query_block query_block, const JoinPredicate edge, AccessPath right_path)
	If the ON clause of a left join only references tables on the right side of the join, pushing the condition into the right side is a valid thing to do. More...

AccessPath *	DeduplicateForSemijoin (THD thd, AccessPath path, Item *semijoin_group, int semijoin_group_size, RelationalExpression expr)
	Build an access path that deduplicates its input on a certain grouping. More...

bool	IsConstantSingleRowPath (const AccessPath &path)
	Check if an access path returns at most one row, and it's constant throughout the query. More...

uint32_t	AddFlag (uint32_t flags, FuzzyComparisonResult flag)

bool	HasFlag (uint32_t flags, FuzzyComparisonResult flag)

bool	IsMaterializationPath (const AccessPath *path)

AccessPath	MakeSortPathWithoutFilesort (THD thd, AccessPath child, ORDER *order, int ordering_state)

bool	CheckSupportedQuery (THD *thd)

AccessPath *	CreateMaterializationOrStreamingPath (THD thd, JOIN join, AccessPath *path, bool need_rowid, bool copy_items)
	Set up an access path for streaming or materializing through a temporary table. More...

AccessPath *	GetSafePathToSort (THD thd, JOIN join, AccessPath *path, bool need_rowid, bool force_materialization)

bool	IsImmediateDeleteCandidate (const Table_ref table_ref, const Query_block query_block)
	Is this DELETE target table a candidate for being deleted from immediately, while scanning the result of the join? It only checks if it is a candidate for immediate delete. More...

void	AddFieldsToTmpSet (Item item, TABLE table)
	Adds all fields of "table" that are referenced from "item" to table->tmp_set. More...

bool	IsImmediateUpdateCandidate (const Table_ref *table_ref, int node_idx, const JoinHypergraph &graph, table_map target_tables)
	Is this UPDATE target table a candidate for being updated immediately, while scanning the result of the join? It only checks if it is a candidate for immediate update. More...

table_map	FindUpdateDeleteTargetTables (const Query_block *query_block)
	Finds all the target tables of an UPDATE or DELETE statement. More...

table_map	FindImmediateUpdateDeleteCandidates (const JoinHypergraph &graph, table_map target_tables, bool is_delete)
	Finds all of the target tables of an UPDATE or DELETE statement that are candidates from being updated or deleted from immediately while scanning the results of the join, without need to buffer the row IDs in a temporary table for delayed update/delete after the join has completed. More...

NodeMap	FindFullTextSearchedTables (const JoinHypergraph &graph)

bool	IsSargableFullTextIndexPredicate (Item *condition)

uint64_t	FindSargableFullTextPredicates (const JoinHypergraph &graph)

NodeMap	GetNodesUnderLimit (const JoinHypergraph &graph, const LogicalOrderings &orderings, int distinct_ordering_idx)

bool	InjectCastNodes (JoinHypergraph *graph)

void	EnableFullTextCoveringIndexes (const Query_block *query_block)

AccessPath *	CreateZeroRowsForEmptyJoin (JOIN join, const char cause)
	Creates a ZERO_ROWS access path for an always empty join result, or a ZERO_ROWS_AGGREGATED in case of an implicitly grouped query. More...

AccessPath	CreateStreamingAggregationPath (THD thd, AccessPath path, JOIN *join, olap_type olap, double row_estimate)
	Creates an AGGREGATE AccessPath, possibly with an intermediary STREAM node if one is needed. More...

bool	IsFinalPredicate (const Predicate &predicate)
	Check if a predicate in the WHERE clause should be applied after all tables have been joined together. More...

bool	SkipFinalPredicates (const AccessPathArray &candidates, const JoinHypergraph &graph)
	Can we skip the ApplyFinalPredicatesAndExpandFilters() step? More...

void	ApplyFinalPredicatesAndExpandFilters (THD thd, const CostingReceiver &receiver, const JoinHypergraph &graph, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, AccessPathArray *root_candidates)

static AccessPath *	CreateTemptableAggregationPath (THD thd, Query_block query_block, AccessPath child_path, double aggregate_rows)

void	SplitHavingCondition (THD thd, Item cond, Item having_cond, Item having_cond_wf)

void	ApplyHavingOrQualifyCondition (THD thd, Item having_cond, Query_block query_block, AccessPathArray root_candidates, CostingReceiver *receiver)

JoinHypergraph::Node *	FindNodeWithTable (JoinHypergraph graph, TABLE table)

bool	ForceMaterializationBeforeSort (const Query_block &query_block, bool need_rowid)
	If we have both ORDER BY and GROUP BY, we need a materialization step after the grouping (if windowing hasn't already given us one) – although in most cases, we only need to materialize one row at a time (streaming), so the performance loss should be very slight. More...

void	SetGroupSkipScanCardinality (AccessPath *path, double output_rows)
	Set the estimated number of output rows for a group skip scan to match the estimate calculated by EstimateDistinctRows() or EstimateAggregateRows(). More...

bool	ObeysIndexOrderHints (AccessPath root_path, JOIN join, bool grouping)

AccessPathArray	ApplyOrderBy (THD *thd, const CostingReceiver &receiver, const LogicalOrderings &orderings, int order_by_ordering_idx, const Query_block &query_block, bool need_rowid, const AccessPathArray &root_candidates)
	Apply the ORDER BY clause. More...

static AccessPath *	ApplyWindow (THD thd, AccessPath root_path, Window window, JOIN join, bool need_rowid_for_window)

static int	FindBestOrderingForWindow (JOIN *join, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, Bounds_checked_array< bool > finished_windows, Bounds_checked_array< bool > tmp_buffer, int first_ordering_idx, int second_ordering_idx, Bounds_checked_array< bool > included_windows)
	Find the ordering that allows us to process the most unprocessed windows. More...

AccessPath *	MakeSortPathAndApplyWindows (THD thd, JOIN join, AccessPath root_path, int ordering_idx, ORDER order, const LogicalOrderings &orderings, Bounds_checked_array< bool > windows_this_iteration, FunctionalDependencySet fd_set, bool need_rowid_for_window, int single_window_idx, Bounds_checked_array< bool > finished_windows, int *num_windows_left)

bool	CheckFoundPlan (THD *thd, const AccessPathArray &candidates, bool is_secondary_engine)
	Check if at least one candidate for a valid query plan was found. More...

Typedef Documentation

◆ AccessPathArray

using anonymous_namespace{join_optimizer.cc}::AccessPathArray = typedef Prealloced_array<AccessPath *, 4>

Function Documentation

◆ AddFieldsToTmpSet()

void anonymous_namespace{join_optimizer.cc}::AddFieldsToTmpSet	(	Item *	item,
		TABLE *	table
	)

Adds all fields of "table" that are referenced from "item" to table->tmp_set.

◆ AddFlag()

uint32_t anonymous_namespace{join_optimizer.cc}::AddFlag	(	uint32_t	flags,
		FuzzyComparisonResult	flag
	)

◆ ApplyFinalPredicatesAndExpandFilters()

void anonymous_namespace{join_optimizer.cc}::ApplyFinalPredicatesAndExpandFilters	(	THD *	thd,
		const CostingReceiver &	receiver,
		const JoinHypergraph &	graph,
		const LogicalOrderings &	orderings,
		FunctionalDependencySet *	fd_set,
		AccessPathArray *	root_candidates
	)

◆ ApplyHavingOrQualifyCondition()

void anonymous_namespace{join_optimizer.cc}::ApplyHavingOrQualifyCondition	(	THD *	thd,
		Item *	having_cond,
		Query_block *	query_block,
		AccessPathArray *	root_candidates,
		CostingReceiver *	receiver
	)

◆ ApplyOrderBy()

AccessPathArray anonymous_namespace{join_optimizer.cc}::ApplyOrderBy	(	THD *	thd,
		const CostingReceiver &	receiver,
		const LogicalOrderings &	orderings,
		int	order_by_ordering_idx,
		const Query_block &	query_block,
		bool	need_rowid,
		const AccessPathArray &	root_candidates
	)

Apply the ORDER BY clause.

For each of 'root_candidates' add a parent sort path if the candidate does not have the right order already. Also, add a limit/offset path on top of that if needed.

Parameters

thd	The current thread.
receiver	The planning context.
orderings	The set of interesting orders.
order_by_ordering_idx	The order by which the result should be ordered.
query_block	The enclosing query block.
need_rowid	True if we need rowids.
root_candidates	The candidate paths.

Returns: The new root paths.

◆ ApplyWindow()

static AccessPath * anonymous_namespace{join_optimizer.cc}::ApplyWindow	(	THD *	thd,
		AccessPath *	root_path,
		Window *	window,
		JOIN *	join,
		bool	need_rowid_for_window
	)

static

◆ CanResolveMoreParameterTables()

bool anonymous_namespace{join_optimizer.cc}::CanResolveMoreParameterTables	(	NodeMap	outer,
		NodeMap	inner,
		NodeMap	outer_parameters,
		NodeMap	inner_parameters,
		NodeMap	outer_reachable,
		NodeMap	inner_reachable
	)

Is it possible to resolve more parameter tables before performing a nested loop join between "outer" and "inner", or will the join have to be performed first?

In more precise terms:

Consider the set of parameters (a set of tables) that are left unresolved after joining inner and outer. This function returns true if this set is non-empty and at least one of these unresolved parameter tables, denoted by t, can be joined directly into either outer or inner such that the result of joining either {outer, t} with {inner} or {outer} with {inner, t} would end up with more resolved parameters (fewer unresolved parameters) than simply joining {outer} and {inner}.

◆ CheckFoundPlan()

bool anonymous_namespace{join_optimizer.cc}::CheckFoundPlan	(	THD *	thd,
		const AccessPathArray &	candidates,
		bool	is_secondary_engine
	)

Check if at least one candidate for a valid query plan was found.

Raise an error if no plan was found.

Return values

false	on success (a plan was found)
true	if an error was raised (no plan found)

◆ CheckKilledOrError()

bool anonymous_namespace{join_optimizer.cc}::CheckKilledOrError ( THD * thd )

Check if the statement is killed or an error has been raised.

If it is killed, also make sure that the appropriate error is raised.

◆ CheckSupportedQuery()

bool anonymous_namespace{join_optimizer.cc}::CheckSupportedQuery ( THD * thd )

◆ CollectPossibleRangeScans()

bool anonymous_namespace{join_optimizer.cc}::CollectPossibleRangeScans	(	THD *	thd,
		SEL_TREE *	tree,
		RANGE_OPT_PARAM *	param,
		OverflowBitset	tree_applied_predicates,
		OverflowBitset	tree_subsumed_predicates,
		const JoinHypergraph &	graph,
		Mem_root_array< PossibleRangeScan > *	possible_scans
	)

◆ ContainsSubqueries()

bool anonymous_namespace{join_optimizer.cc}::ContainsSubqueries ( Item * item_arg )

◆ CreateMaterializationOrStreamingPath()

AccessPath * anonymous_namespace{join_optimizer.cc}::CreateMaterializationOrStreamingPath	(	THD *	thd,
		JOIN *	join,
		AccessPath *	path,
		bool	need_rowid,
		bool	copy_items
	)

Set up an access path for streaming or materializing through a temporary table.

If none is needed (because earlier iterators already materialize what needs to be done), returns the path itself.

The actual temporary table will be created and filled out during finalization.

◆ CreateMaterializationPath()

AccessPath *anonymous_namespace join_optimizer anonymous_namespace{join_optimizer.cc}::cc::CreateMaterializationPath	(	THD *	thd,
		JOIN *	join,
		AccessPath *	path,
		TABLE *	temp_table,
		Temp_table_param *	temp_table_param,
		bool	copy_items,
		double *	distinct_rows,
		MaterializePathParameters::DedupType	dedup_reason
	)

Sets up an access path for materializing the results returned from a path in a temporary table.

Parameters

distinct_rows If non-null, it may contain pre-calculated number of distinct rows or number of groups; or if kUnknownRowCount, it should be calculated here and returned through this parameter.

◆ CreateStreamingAggregationPath()

AccessPath anonymous_namespace{join_optimizer.cc}::CreateStreamingAggregationPath	(	THD *	thd,
		AccessPath *	path,
		JOIN *	join,
		olap_type	olap,
		double	row_estimate
	)

Creates an AGGREGATE AccessPath, possibly with an intermediary STREAM node if one is needed.

The creation of the temporary table does not happen here, but is left for FinalizePlanForQueryBlock().

Parameters

thd	The current thread.
join	The join to which 'path' belongs.
olap	contains the GROUP BY modifier type
row_estimate	estimated number of output rows, so that we do not need to recalculate it, or kUnknownRowCount if unknown.

Returns: The AGGREGATE AccessPath.

◆ CreateTemptableAggregationPath()

static AccessPath * anonymous_namespace{join_optimizer.cc}::CreateTemptableAggregationPath	(	THD *	thd,
		Query_block *	query_block,
		AccessPath *	child_path,
		double *	aggregate_rows
	)

static

◆ CreateZeroRowsForEmptyJoin()

AccessPath * anonymous_namespace{join_optimizer.cc}::CreateZeroRowsForEmptyJoin	(	JOIN *	join,
		const char *	cause
	)

Creates a ZERO_ROWS access path for an always empty join result, or a ZERO_ROWS_AGGREGATED in case of an implicitly grouped query.

The zero rows path is wrapped in FILTER (for HAVING) or LIMIT_OFFSET paths as needed, as well as UPDATE_ROWS/DELETE_ROWS paths for UPDATE/DELETE statements.

◆ DeduplicateForSemijoin()

AccessPath * anonymous_namespace{join_optimizer.cc}::DeduplicateForSemijoin	(	THD *	thd,
		AccessPath *	path,
		Item **	semijoin_group,
		int	semijoin_group_size,
		RelationalExpression *	expr
	)

Build an access path that deduplicates its input on a certain grouping.

This is used for converting semijoins to inner joins. If the grouping is empty, all rows are the same, and we make a simple LIMIT 1 instead.

◆ DisallowParameterizedJoinPath()

bool anonymous_namespace{join_optimizer.cc}::DisallowParameterizedJoinPath	(	AccessPath *	left_path,
		AccessPath *	right_path,
		NodeMap	left,
		NodeMap	right,
		NodeMap	left_reachable,
		NodeMap	right_reachable,
		bool	is_reorderable
	)

Decide whether joining the two given paths would create a disallowed parameterized path.

Parameterized paths are disallowed if they delay joining in their parameterizations without reason (ie., they could join in a parameterization right away, but don't). This is a trick borrowed from Postgres, which essentially forces inner-join ref-lookup plans to be left-deep (since such plans never gain anything from being bushy), reducing the search space significantly without compromising plan quality.

Parameters

left_path	An access path which joins together a superset of all the tables on the left-hand side of the hyperedge for which we are creating a join.
right_path	An access path which joins together a superset of all the tables on the right-hand side of the hyperedge for which we are creating a join.
left	The set of tables joined together in "left_path".
right	The set of tables joined together in "right_path".
left_reachable	The set of tables that can be joined directly with "left_path", with no intermediate join being performed first. If a table is in this set, it is possible to construct a nested loop join between an access path accessing only that table and the access path pointed to by "left_path".
right_reachable	The set of tables that can be joined directly with "right_path", with no intermediate join being performed first. If a table is in this set, it is possible to construct a nested loop join between an access path accessing only that table and the access path pointed to by "right_path".
is_reorderable	True if the optimizer may try to construct a nested loop join between "left_path" and "right_path" in either direction. False if the optimizer will consider nested loop joins in only one direction, with "left_path" as the outer table and "right_path" as the inner table. When it is true, we disallow a parameterized join path only if it is possible to resolve more parameter tables first in both join orders. This is slightly more lenient than it has to be, as it will allow parameterized join paths with both join orders, even though one of the orders can join with a parameter table first. Since all of these joins will be parameterized on the same set of tables, this extra leniency is not believed to contribute much to the explosion of plans with different parameterizations.

◆ EnableFullTextCoveringIndexes()

void anonymous_namespace{join_optimizer.cc}::EnableFullTextCoveringIndexes ( const Query_block * query_block )

◆ EngineFlags()

SecondaryEngineFlags anonymous_namespace{join_optimizer.cc}::EngineFlags ( const THD * thd )

Lists the current secondary engine flags in use.

If there is no secondary engine, will use a default set of permissive flags suitable for non-secondary engine use.

◆ EstimateOutputRowsFromRangeTree()

double anonymous_namespace{join_optimizer.cc}::EstimateOutputRowsFromRangeTree	(	THD *	thd,
		RANGE_OPT_PARAM &	param,
		ha_rows	total_rows,
		const Mem_root_array< PossibleRangeScan > &	possible_scans,
		const Mem_root_array< PossibleIndexMerge > &	possible_imerges,
		const JoinHypergraph &	graph,
		OverflowBitset	predicates
	)

Based on estimates for all the different range scans (which cover different but potentially overlapping combinations of predicates), try to find an estimate for the number of rows scanning the given table, with all predicates applied.

The #1 priority here is to get a single estimate for all (non-parameterized) scans over this table (including non-range scans), that we can reuse for all access paths. This makes sure they are fairly compared on cost (and ordering) alone; different estimates would be nonsensical, and cause those where we happen to have lower estimates to get preferred as they are joined higher up in the tree. Obviously, however, it is also attractive to get an estimate that is as good as possible. We only really care about the total selectivity of all predicates; we don't care to adjust each individual selectivity.

[Mar07] describes an unbiased estimator that is exactly what we want, and [Hav20] demonstrates an efficient calculation method (up to about 20–25 possible predicates) of this estimator. Long-term, implementing this would be our best choice. However, the implementation is not entirely trivial:

If the selectivity estimates are not consistent (e.g. S(a AND b) < S(a)S(b)), the algorithm will fail to converge. Extra steps are needed to correct for this.
The efficient algorithm (in [Hav20]) requires a linear algebra library (for performant matrix multiplication and Cholesky decomposition).
If we have a lot of estimates, even the efficient algorithm fails to converge in time (just the answers require 2^n space), and we would need additional logic to partition the problem.

Thus, for the time being, we use an ad-hoc algorithm instead. The estimate will not be as good, but it will hopefully be on the pessimistic side (overestimating the number of rows). It goes as follows:

Pick the most-covering index (ie., the range scan that applies the most number of predicates) that does not cover any predicates we've already accounted for. If there are multiple ones, choose the least selective.
Multiply in its selectivity, and mark all the predicates it covers as accounted for. Repeat #1 and #2 for as long as possible.
For any remaining predicates, if the predicate is an OR predicate, check if the range optimizer has set up index merge information. If so, get the estimates from the index merge. Else, multiply by their existing estimate (ie., the one not coming from the range optimizer).

The hope is that in #1, we will usually prefer using selectivity information from indexes with more keyparts; e.g., it's better to use an index on (a,b) than on (a) alone, since it will take into account the correlation between predicates on a and predicates on b.

[Mar07]: Markl et al: “Consistent Selectivity Estimation Via Maximum Entropy” [Hav20]: Havenstein et al: “Fast Entropy Maximization for Selectivity Estimation of Conjunctive Predicates on CPUs and GPUs”

◆ EstimateRowsFromIndexMerge()

ha_rows anonymous_namespace{join_optimizer.cc}::EstimateRowsFromIndexMerge	(	THD *	thd,
		const SEL_IMERGE *	imerge,
		RANGE_OPT_PARAM *	param
	)

◆ FindAppliedAndSubsumedPredicatesForRangeScan()

void anonymous_namespace{join_optimizer.cc}::FindAppliedAndSubsumedPredicatesForRangeScan	(	THD *	thd,
		KEY *	key,
		unsigned	used_key_parts,
		unsigned	num_exact_key_parts,
		TABLE *	table,
		OverflowBitset	tree_applied_predicates,
		OverflowBitset	tree_subsumed_predicates,
		const JoinHypergraph &	graph,
		OverflowBitset *	applied_predicates_out,
		OverflowBitset *	subsumed_predicates_out
	)

◆ FindBestOrderingForWindow()

static int anonymous_namespace{join_optimizer.cc}::FindBestOrderingForWindow	(	JOIN *	join,
		const LogicalOrderings &	orderings,
		FunctionalDependencySet	fd_set,
		const Mem_root_array< SortAheadOrdering > &	sort_ahead_orderings,
		Bounds_checked_array< bool >	finished_windows,
		Bounds_checked_array< bool >	tmp_buffer,
		int	first_ordering_idx,
		int	second_ordering_idx,
		Bounds_checked_array< bool >	included_windows
	)

static

Find the ordering that allows us to process the most unprocessed windows.

If specified, we can also demand that the ordering satisfies one or two later orderings (for DISTINCT and/or ORDER BY).

Our priorities are, in strict order:

Satisfying both DISTINCT and ORDER BY (if both are given; but see below).
Satisfying the first operation after windowing (which is either DISTINCT or ORDER BY).
Satisfying as many windows as possible.
The shortest possible ordering (as a tie-breaker).

If first_ordering_idx is given, #2 is mandatory. #4 is so that we don't get strange situations where the user specifies e.g. OVER (ORDER BY i) and we choose an ordering i,j,k,l,... because it happened to be given somewhere else.

Note that normally, it is very hard to satisfy DISTINCT for a window function, because generally, it isn't constant for a given input (by nature, it also depends on other rows). But it can happen if the window frame is static; see main.window_functions_interesting_orders.

Parameters

	join	Contains the list of windows.
	orderings	Logical orderings in the query block.
	sort_ahead_orderings	Candidate orderings to consider.
	fd_set	Active functional dependencies.
	finished_windows	Windows to ignore.
	tmp_buffer	Temporary space for keeping the best list of windows so far; must be as large as the number of values.
	first_ordering_idx	The first ordering after the query block that we need to satisfy (-1 if none).
	second_ordering_idx	The second ordering after the query block that we would like to satisfy (-1 if none).
[out]	included_windows	Which windows can be sorted using the given ordering.

Returns: An index into sort_ahead_orderings, or -1 if no ordering could be found that sorts at least one window (plus, if first_ordering_idx is set, follows that ordering).

◆ FindCheapestIndexRangeScan()

AccessPath * anonymous_namespace{join_optimizer.cc}::FindCheapestIndexRangeScan	(	THD *	thd,
		SEL_TREE *	tree,
		RANGE_OPT_PARAM *	param,
		bool	prefer_clustered_primary_key_scan,
		bool *	inexact,
		bool	need_rowid_ordered_rows
	)

From a collection of index scans, find the single cheapest one and generate an AccessPath for it.

This is similar to CollectPossibleRangeScans(), except that this is for index merge, where we don't want to enumerate all possibilities; since we don't care about the ordering of the index (we're going to sort all of the rows to deduplicate them anyway), cost is the only interesting metric, so we only need to pick out and collect ranges for one of them. (This isn't strictly true; sometimes, it can be attractive to choose a clustered primary key, so we prefer one if we allow them. See the code about is_preferred_cpk below, and the comment on the caller. Also, see about exactness below.)

This function can probably be extended to find ROR-capable scans later (just check is_ror_scan instead of is_imerge_scan).

Note that all such scans are index-only (covering), which is reflected in the cost parameters we use.

inexact is set to true if and only if the chosen path does not reflect its predicate faithfully, and needs to be rechecked. We do not currently take into account that this may affect the cost higher up, as the difference should be small enough that we don't want the combinatorial explosion.

◆ FindFullTextSearchedTables()

NodeMap anonymous_namespace{join_optimizer.cc}::FindFullTextSearchedTables ( const JoinHypergraph & graph )

◆ FindImmediateUpdateDeleteCandidates()

table_map anonymous_namespace{join_optimizer.cc}::FindImmediateUpdateDeleteCandidates	(	const JoinHypergraph &	graph,
		table_map	target_tables,
		bool	is_delete
	)

Finds all of the target tables of an UPDATE or DELETE statement that are candidates from being updated or deleted from immediately while scanning the results of the join, without need to buffer the row IDs in a temporary table for delayed update/delete after the join has completed.

These are candidates only; the actual tables to update while scanning, if any, will be chosen based on cost during planning.

◆ FindNodeWithTable()

JoinHypergraph::Node * anonymous_namespace{join_optimizer.cc}::FindNodeWithTable	(	JoinHypergraph *	graph,
		TABLE *	table
	)

◆ FindReachableTablesFrom()

NodeMap anonymous_namespace{join_optimizer.cc}::FindReachableTablesFrom	(	NodeMap	tables,
		const JoinHypergraph &	graph
	)

Find the set of tables we can join directly against, given that we have the given set of tables on one of the sides (effectively the same concept as DPhyp's “neighborhood”).

Note that having false negatives here is fine (it will only make DisallowParameterizedJoinPath() slightly less effective), but false positives is not (it may disallow valid parameterized paths, ultimately even making LATERAL queries impossible to plan). Thus, we need to check conflict rules, and our handling of hyperedges with more than one table on the other side may also be a bit too strict (this may need adjustments when we get FULL OUTER JOIN).

If this calculation turns out to be slow, we could probably cache it in AccessPathSet, or even try to build it incrementally.

◆ FindSargableFullTextPredicates()

uint64_t anonymous_namespace{join_optimizer.cc}::FindSargableFullTextPredicates ( const JoinHypergraph & graph )

◆ FindUpdateDeleteTargetTables()

table_map anonymous_namespace{join_optimizer.cc}::FindUpdateDeleteTargetTables ( const Query_block * query_block )

Finds all the target tables of an UPDATE or DELETE statement.

It additionally disables covering index scans on the target tables, since ha_update_row() and ha_delete_row() can only be called on scans reading the full row.

◆ ForceMaterializationBeforeSort()

bool anonymous_namespace{join_optimizer.cc}::ForceMaterializationBeforeSort	(	const Query_block &	query_block,
		bool	need_rowid
	)

If we have both ORDER BY and GROUP BY, we need a materialization step after the grouping (if windowing hasn't already given us one) – although in most cases, we only need to materialize one row at a time (streaming), so the performance loss should be very slight.

This is because when filesort only really deals with fields, not values; when it is to “output” a row, it puts back the contents of the sorted table's (or tables') row buffer(s). For expressions that only depend on the current row, such as (f1 + 1), this is fine, but aggregate functions (Item_sum) depend on multiple rows, so we need a field where filesort can put back its value (and of course, subsequent readers need to read from that field instead of trying to evaluate the Item_sum). A temporary table provides just that, so we create one based on the current field list; StreamingIterator (or MaterializeIterator, if we actually need to materialize) will evaluate all the Items in turn and put their values into the temporary table's fields.

For simplicity, we materialize all items in the SELECT list, even those that are not aggregate functions. This is a tiny performance loss, but makes things simpler.

The test on join->sum_funcs is mainly to avoid having to create temporary tables in unit tests; the rationale is that if there are no aggregate functions, we also cannot sort on them, and thus, we don't get the problem. Note that we can't do this if sorting by row IDs, as AggregateIterator doesn't preserve them (doing so would probably not be worth it for something that's fairly niche).

◆ GetNodesUnderLimit()

NodeMap anonymous_namespace{join_optimizer.cc}::GetNodesUnderLimit	(	const JoinHypergraph &	graph,
		const LogicalOrderings &	orderings,
		int	distinct_ordering_idx
	)

◆ GetRowIdOrdering()

int anonymous_namespace{join_optimizer.cc}::GetRowIdOrdering	(	const TABLE *	table,
		const LogicalOrderings *	orderings,
		const Mem_root_array< ActiveIndexInfo > *	active_indexes
	)

◆ GetSafePathToSort()

AccessPath * anonymous_namespace{join_optimizer.cc}::GetSafePathToSort	(	THD *	thd,
		JOIN *	join,
		AccessPath *	path,
		bool	need_rowid,
		bool	force_materialization
	)

◆ GetSargableFullTextPredicate()

Item_func_match * anonymous_namespace{join_optimizer.cc}::GetSargableFullTextPredicate ( const Predicate & predicate )

Returns the MATCH function of a predicate that can be pushed down to a full-text index.

This can be done if the predicate is a MATCH function, or in some cases (see IsSargableFullTextIndexPredicate() for details) where the predicate is a comparison function which compares the result of MATCH with a constant. For example, predicates on this form could be pushed down to a full-text index:

WHERE MATCH (x) AGAINST ('search string') AND <more predicates>

WHERE MATCH (x) AGAINST ('search string') > 0.5 AND <more predicates>

Since full-text index scans return documents with positive scores only, an index scan can only be used if the predicate excludes negative or zero scores.

◆ GetTableAfterFiltersCardinalityFromHypergraph()

std::optional< double > anonymous_namespace{join_optimizer.cc}::GetTableAfterFiltersCardinalityFromHypergraph	(	int	node_idx,
		OverflowBitset	applied_predicates,
		JoinHypergraph *	graph
	)

Attempt to retrieve the estimated cardinality for the given base table node (after filters have been applied) from the Hypergraph.

If found, we multiply it by the selectivity of the join conditions in applied_predicates and return it.

Parameters

node_idx	Index of the base table in the nodes array.
applied_predicates	Bitmap of predicates that are already applied by means of ref access.
graph	pointer to the JoinHypergraph object.

Returns: the estimated cardinality, or std::nullopt if none.

◆ HasConstantEqualityForField()

bool anonymous_namespace{join_optimizer.cc}::HasConstantEqualityForField	(	const Mem_root_array< SargablePredicate > &	sargable_predicates,
		const Field *	field
	)

Do we have a sargable predicate which checks if "field" is equal to a constant?

◆ HasFlag()

bool anonymous_namespace{join_optimizer.cc}::HasFlag	(	uint32_t	flags,
		FuzzyComparisonResult	flag
	)

◆ InjectCastNodes()

bool anonymous_namespace{join_optimizer.cc}::InjectCastNodes ( JoinHypergraph * graph )

◆ IsConstantSingleRowPath()

bool anonymous_namespace{join_optimizer.cc}::IsConstantSingleRowPath ( const AccessPath & path )

Check if an access path returns at most one row, and it's constant throughout the query.

This includes single-row index lookups which use constant key values only, and zero-rows paths. Such access paths can be performed once per query and be cached, and are known to return at most one row, so they can safely be used on either side of a nested loop join without risk of becoming much more expensive than expected because of inaccurate row estimates.

◆ IsDeleteStatement()

bool anonymous_namespace{join_optimizer.cc}::IsDeleteStatement ( const THD * thd )

Is the current statement a DELETE statement?

◆ IsEmptyJoin()

bool anonymous_namespace{join_optimizer.cc}::IsEmptyJoin	(	const RelationalExpression::Type	join_type,
		bool	left_is_empty,
		bool	right_is_empty
	)

Checks if the result of a join is empty, given that it is known that one or both of the join legs always produces an empty result.

◆ IsFinalPredicate()

bool anonymous_namespace{join_optimizer.cc}::IsFinalPredicate ( const Predicate & predicate )

Check if a predicate in the WHERE clause should be applied after all tables have been joined together.

That is, the predicate either references no tables in this query block, or it is non-deterministic.

◆ IsImmediateDeleteCandidate()

bool anonymous_namespace{join_optimizer.cc}::IsImmediateDeleteCandidate	(	const Table_ref *	table_ref,
		const Query_block *	query_block
	)

Is this DELETE target table a candidate for being deleted from immediately, while scanning the result of the join? It only checks if it is a candidate for immediate delete.

Whether it actually ends up being deleted from immediately, depends on the plan that is chosen.

◆ IsImmediateUpdateCandidate()

bool anonymous_namespace{join_optimizer.cc}::IsImmediateUpdateCandidate	(	const Table_ref *	table_ref,
		int	node_idx,
		const JoinHypergraph &	graph,
		table_map	target_tables
	)

Is this UPDATE target table a candidate for being updated immediately, while scanning the result of the join? It only checks if it is a candidate for immediate update.

Whether it actually ends up being updated immediately, depends on the plan that is chosen.

◆ IsLimitHintPushableToFullTextSearch()

bool anonymous_namespace{join_optimizer.cc}::IsLimitHintPushableToFullTextSearch	(	const Item_func_match *	match,
		const JoinHypergraph &	graph,
		uint64_t	fulltext_predicates
	)

◆ IsMaterializationPath()

bool anonymous_namespace{join_optimizer.cc}::IsMaterializationPath ( const AccessPath * path )

◆ IsSargableFullTextIndexPredicate()

bool anonymous_namespace{join_optimizer.cc}::IsSargableFullTextIndexPredicate ( Item * condition )

◆ IsSubsumableFullTextPredicate()

bool anonymous_namespace{join_optimizer.cc}::IsSubsumableFullTextPredicate ( Item_func * condition )

◆ IsUpdateStatement()

bool anonymous_namespace{join_optimizer.cc}::IsUpdateStatement ( const THD * thd )

Is the current statement a DELETE statement?

◆ LateralDependenciesAreSatisfied()

bool anonymous_namespace{join_optimizer.cc}::LateralDependenciesAreSatisfied	(	int	node_idx,
		NodeMap	tables,
		const JoinHypergraph &	graph
	)

Checks if the table given by "node_idx" has all its lateral dependencies satisfied by the set of tables given by "tables".

◆ MakeSortPathAndApplyWindows()

AccessPath * anonymous_namespace{join_optimizer.cc}::MakeSortPathAndApplyWindows	(	THD *	thd,
		JOIN *	join,
		AccessPath *	root_path,
		int	ordering_idx,
		ORDER *	order,
		const LogicalOrderings &	orderings,
		Bounds_checked_array< bool >	windows_this_iteration,
		FunctionalDependencySet	fd_set,
		bool	need_rowid_for_window,
		int	single_window_idx,
		Bounds_checked_array< bool >	finished_windows,
		int *	num_windows_left
	)

◆ MakeSortPathWithoutFilesort()

AccessPath anonymous_namespace{join_optimizer.cc}::MakeSortPathWithoutFilesort	(	THD *	thd,
		AccessPath *	child,
		ORDER *	order,
		int	ordering_state
	)

◆ MoveDegenerateJoinConditionToFilter()

void anonymous_namespace{join_optimizer.cc}::MoveDegenerateJoinConditionToFilter	(	THD *	thd,
		Query_block *	query_block,
		const JoinPredicate **	edge,
		AccessPath **	right_path
	)

If the ON clause of a left join only references tables on the right side of the join, pushing the condition into the right side is a valid thing to do.

If such conditions are not pushed down for some reason, and are left in the ON clause, HeatWave might reject the query. This happens if the entire join condition is degenerate and only references the right side. Such conditions are most commonly seen in queries that have gone through subquery_to_derived transformation.

This limitation is worked around here by moving the degenerate join condition from the join predicate to a filter path on top of the right path. This is only done for secondary storage engines.

TODO(khatlen): If HeatWave gets capable of processing queries with such conditions, this workaround should be removed.

◆ ObeysIndexOrderHints()

bool anonymous_namespace{join_optimizer.cc}::ObeysIndexOrderHints	(	AccessPath *	root_path,
		JOIN *	join,
		bool	grouping
	)

◆ PrintAccessPath()

string anonymous_namespace{join_optimizer.cc}::PrintAccessPath	(	const AccessPath &	path,
		const JoinHypergraph &	graph,
		const char *	description_for_trace
	)

◆ PrintJoinOrder()

void anonymous_namespace join_optimizer anonymous_namespace{join_optimizer.cc}::cc::PrintJoinOrder	(	const AccessPath *	path,
		string *	join_order
	)

Used by optimizer trace to print join order of join paths.

Appends into 'join_order' a string that looks something like '(t1,(t2,t3))' where t1 is an alias of any kind of table including materialized table, and t1 is joined with (t2,t3) where (t2,t3) is another join.

◆ SecondaryEngineCostHook()

secondary_engine_modify_view_ap_cost_t anonymous_namespace{join_optimizer.cc}::SecondaryEngineCostHook ( THD * thd )

Gets the secondary storage engine cost modification function, if any.

◆ SecondaryEngineStateCheckHook()

secondary_engine_check_optimizer_request_t anonymous_namespace{join_optimizer.cc}::SecondaryEngineStateCheckHook ( const THD * thd )

Gets the secondary storage engine hypergraph state hook function, if any.

◆ SetGroupSkipScanCardinality()

void anonymous_namespace{join_optimizer.cc}::SetGroupSkipScanCardinality	(	AccessPath *	path,
		double	output_rows
	)

Set the estimated number of output rows for a group skip scan to match the estimate calculated by EstimateDistinctRows() or EstimateAggregateRows().

◆ SetNumOutputRowsAfterFilter()

void anonymous_namespace{join_optimizer.cc}::SetNumOutputRowsAfterFilter	(	AccessPath *	path,
		double	output_rows
	)

Set the number of output rows after filter for an access path to a new value.

If that value is higher than the existing estimate for the number of output rows before filter, also increase the number of output rows before filter for consistency, as a filter never adds rows.

◆ SkipFinalPredicates()

bool anonymous_namespace{join_optimizer.cc}::SkipFinalPredicates	(	const AccessPathArray &	candidates,
		const JoinHypergraph &	graph
	)

Can we skip the ApplyFinalPredicatesAndExpandFilters() step?

It is an unnecessary step if there are no FILTER access paths to expand. It's not so expensive that it's worth spending a lot of effort to find out if it can be skipped, but let's skip it if our only candidate is an EQ_REF with no filter predicates, so that we don't waste time in point selects.

◆ SplitHavingCondition()

void anonymous_namespace{join_optimizer.cc}::SplitHavingCondition	(	THD *	thd,
		Item *	cond,
		Item **	having_cond,
		Item **	having_cond_wf
	)

◆ UpdateAppliedAndSubsumedPredicates()

void anonymous_namespace{join_optimizer.cc}::UpdateAppliedAndSubsumedPredicates	(	const uint	idx,
		const Mem_root_array< PossibleRORScan > &	possible_ror_scans,
		const RANGE_OPT_PARAM *	param,
		OverflowBitset *	applied_predicates,
		OverflowBitset *	subsumed_predicates
	)

◆ WasPushedDownToRef()

int anonymous_namespace{join_optimizer.cc}::WasPushedDownToRef	(	Item *	condition,
		const KeypartForRef *	keyparts,
		unsigned	num_keyparts
	)

Classes

Typedefs

Functions

Typedef Documentation

◆ AccessPathArray

Function Documentation

◆ AddFieldsToTmpSet()

◆ AddFlag()

◆ ApplyFinalPredicatesAndExpandFilters()

◆ ApplyHavingOrQualifyCondition()

◆ ApplyOrderBy()

◆ ApplyWindow()

◆ CanResolveMoreParameterTables()

◆ CheckFoundPlan()

◆ CheckKilledOrError()

◆ CheckSupportedQuery()

◆ CollectPossibleRangeScans()

◆ ContainsSubqueries()

◆ CreateMaterializationOrStreamingPath()

◆ CreateMaterializationPath()

◆ CreateStreamingAggregationPath()

◆ CreateTemptableAggregationPath()

◆ CreateZeroRowsForEmptyJoin()

◆ DeduplicateForSemijoin()

◆ DisallowParameterizedJoinPath()

◆ EnableFullTextCoveringIndexes()

◆ EngineFlags()

◆ EstimateOutputRowsFromRangeTree()

◆ EstimateRowsFromIndexMerge()

◆ FindAppliedAndSubsumedPredicatesForRangeScan()

◆ FindBestOrderingForWindow()

◆ FindCheapestIndexRangeScan()

◆ FindFullTextSearchedTables()

◆ FindImmediateUpdateDeleteCandidates()

◆ FindNodeWithTable()

◆ FindReachableTablesFrom()

◆ FindSargableFullTextPredicates()

◆ FindUpdateDeleteTargetTables()

◆ ForceMaterializationBeforeSort()

◆ GetNodesUnderLimit()

◆ GetRowIdOrdering()

◆ GetSafePathToSort()

◆ GetSargableFullTextPredicate()

◆ GetTableAfterFiltersCardinalityFromHypergraph()

◆ HasConstantEqualityForField()

◆ HasFlag()

◆ InjectCastNodes()

◆ IsConstantSingleRowPath()

◆ IsDeleteStatement()

◆ IsEmptyJoin()

◆ IsFinalPredicate()

◆ IsImmediateDeleteCandidate()

◆ IsImmediateUpdateCandidate()

◆ IsLimitHintPushableToFullTextSearch()

◆ IsMaterializationPath()

◆ IsSargableFullTextIndexPredicate()

◆ IsSubsumableFullTextPredicate()

◆ IsUpdateStatement()

◆ LateralDependenciesAreSatisfied()

◆ MakeSortPathAndApplyWindows()

◆ MakeSortPathWithoutFilesort()

◆ MoveDegenerateJoinConditionToFilter()

◆ ObeysIndexOrderHints()

◆ PrintAccessPath()

◆ PrintJoinOrder()

◆ SecondaryEngineCostHook()

◆ SecondaryEngineStateCheckHook()

◆ SetGroupSkipScanCardinality()

◆ SetNumOutputRowsAfterFilter()

◆ SkipFinalPredicates()

◆ SplitHavingCondition()

◆ UpdateAppliedAndSubsumedPredicates()

◆ WasPushedDownToRef()