#include "my_base.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/relational_expression.h"
#include "sql/mem_root_array.h"

Classes
struct	FilterCost
	See EstimateFilterCost. More...

Functions
void	AddCost (THD thd, const ContainedSubquery &subquery, double num_rows, FilterCost cost)
	Used internally by EstimateFilterCost() only. More...

FilterCost	EstimateFilterCost (THD thd, double num_rows, Item condition, const Query_block *outer_query_block)
	Estimate the cost of evaluating “condition”, “num_rows” times. More...

FilterCost	EstimateFilterCost (THD *thd, double num_rows, const Mem_root_array< ContainedSubquery > &contained_subqueries)
	A cheaper overload of EstimateFilterCost() that assumes that all contained subqueries have already been extracted (ie., it skips the walking, which can be fairly expensive). More...

double	EstimateCostForRefAccess (THD thd, TABLE table, unsigned key_idx, double num_output_rows)

void	EstimateSortCost (THD thd, AccessPath path, double distinct_rows=kUnknownRowCount)
	Estimate costs and output rows for a SORT AccessPath. More...

void	EstimateMaterializeCost (THD thd, AccessPath path)

double	EstimateDistinctRows (THD thd, double child_rows, Bounds_checked_array< const Item const > terms)
	Estimate the number of rows with a distinct combination of values for 'terms'. More...

void	EstimateAggregateCost (THD thd, AccessPath path, const Query_block *query_block)
	Estimate costs and result row count for an aggregate operation. More...

void	EstimateDeleteRowsCost (AccessPath *path)

void	EstimateUpdateRowsCost (AccessPath *path)

void	EstimateStreamCost (AccessPath *path)
	Estimate the costs and row count for a STREAM AccessPath. More...

void	EstimateLimitOffsetCost (AccessPath *path)
	Estimate the costs and row count for a WINDOW AccessPath. More...

void	EstimateWindowCost (AccessPath *path)
	Estimate the costs and row count for a WINDOW AccessPath. More...

double	EstimateSemijoinFanOut (THD *thd, double right_rows, const JoinPredicate &edge)
	Estimate the fan out for a left semijoin or a left antijoin. More...

double	FindOutputRowsForJoin (THD thd, double left_rows, double right_rows, const JoinPredicate edge)
	Estimate the number of output rows from joining two relations. More...

Variables
constexpr size_t	kMaxItemLengthEstimate = 4096
	When we make cost estimates, we use this as the maximal length the values we get from evaluating an Item (in bytes). More...

constexpr double	kApplyOneFilterCost = 0.1

constexpr double	kAggregateOneRowCost = 0.1

constexpr double	kSortOneRowCost = 0.1

constexpr double	kHashBuildOneRowCost = 0.1

constexpr double	kHashProbeOneRowCost = 0.1

constexpr double	kHashReturnOneRowCost = 0.07

constexpr double	kMaterializeOneRowCost = 0.1

constexpr double	kWindowOneRowCost = 0.1

constexpr ha_rows	kRowEstimateFallback = 1000
	A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for table functions). More...

Function Documentation

◆ AddCost()

void AddCost	(	THD *	thd,
		const ContainedSubquery &	subquery,
		double	num_rows,
		FilterCost *	cost
	)

Used internally by EstimateFilterCost() only.

◆ EstimateAggregateCost()

void EstimateAggregateCost	(	THD *	thd,
		AccessPath *	path,
		const Query_block *	query_block
	)

Estimate costs and result row count for an aggregate operation.

Parameters

[in,out]	thd	The current thread.
[in,out]	path	The AGGREGATE path.
[in]	query_block	The Query_block to which 'path' belongs.

◆ EstimateCostForRefAccess()

double EstimateCostForRefAccess	(	THD *	thd,
		TABLE *	table,
		unsigned	key_idx,
		double	num_output_rows
	)

◆ EstimateDeleteRowsCost()

void EstimateDeleteRowsCost ( AccessPath * path )

◆ EstimateDistinctRows()

double EstimateDistinctRows	(	THD *	thd,
		double	child_rows,
		Bounds_checked_array< const Item *const >	terms
	)

Estimate the number of rows with a distinct combination of values for 'terms'.

See also: EstimateDistinctRowsFromStatistics for additional details.

Parameters

thd	The current thread.
child_rows	The number of input rows.
terms	The terms for which we estimate the number of unique combinations.

Returns: The estimated number of output rows.

◆ EstimateFilterCost() [1/2]

FilterCost EstimateFilterCost	(	THD *	thd,
		double	num_rows,
		const Mem_root_array< ContainedSubquery > &	contained_subqueries
	)

inline

A cheaper overload of EstimateFilterCost() that assumes that all contained subqueries have already been extracted (ie., it skips the walking, which can be fairly expensive).

This data is typically computed by FindContainedSubqueries().

◆ EstimateFilterCost() [2/2]

FilterCost EstimateFilterCost	(	THD *	thd,
		double	num_rows,
		Item *	condition,
		const Query_block *	outer_query_block
	)

Estimate the cost of evaluating “condition”, “num_rows” times.

This is a fairly rudimentary estimation, but it includes the cost of any subqueries that may be present and that need evaluation.

◆ EstimateLimitOffsetCost()

void EstimateLimitOffsetCost ( AccessPath * path )

Estimate the costs and row count for a WINDOW AccessPath.

As described in

See also: AccessPath::m_init_cost, the cost to read k out of N rows would be init_cost + (k/N) * (cost - init_cost).

◆ EstimateMaterializeCost()

void EstimateMaterializeCost	(	THD *	thd,
		AccessPath *	path
	)

◆ EstimateSemijoinFanOut()

double EstimateSemijoinFanOut	(	THD *	thd,
		double	right_rows,
		const JoinPredicate &	edge
	)

Estimate the fan out for a left semijoin or a left antijoin.

The fan out is defined as the number of result rows, divided by the number of input rows from the left hand relation. For a semijoin, J1:

SELECT ... FROM t1 WHERE EXISTS (SELECT ... FROM t2 WHERE predicate)

we know that the fan out of the corresponding inner join J2:

SELECT ... FROM t1, t2 WHERE predicate

is: F(J2) = CARD(t2) * SELECTIVITY(predicate) , where CARD(t2)=right_rows, and SELECTIVITY(predicate)=edge.selectivity. If 'predicate' is a deterministic function of t1 and t2 rows, then J1 is equivalent to an inner join J3:

SELECT ... FROM t1 JOIN (SELECT DISTINCT f1,..fn FROM t2) d ON predicate

where f1,..fn are those fields from t2 that appear in the predicate.

Then F(J1) = F(J3) = F(J2) * CARD(d) / CARD(t2) = CARD(d) * SELECTIVITY(predicate).

This function therefore collects f1..fn and estimates CARD(d). As a special case, 'predicate' may be independent of t2. The query is then equivalent to:

SELECT ... FROM t1 WHERE predicate AND (SELECT COUNT(*) FROM t2) > 0

The fan out is then the selectivity of 'predicate' multiplied by the probability of t2 having at least one row.

Parameters

thd	The current thread.
right_rows	The number of input rows from the right hand relation.
edge	Join edge.

Returns: fan out.

◆ EstimateSortCost()

void EstimateSortCost	(	THD *	thd,
		AccessPath *	path,
		double	distinct_rows = `kUnknownRowCount`
	)

Estimate costs and output rows for a SORT AccessPath.

Parameters

thd	Current thread.
path	the AccessPath.
distinct_rows	An estimate of the number of distinct rows, if remove_duplicates==true and we have an estimate already.

◆ EstimateStreamCost()

void EstimateStreamCost ( AccessPath * path )

Estimate the costs and row count for a STREAM AccessPath.

◆ EstimateUpdateRowsCost()

void EstimateUpdateRowsCost ( AccessPath * path )

◆ EstimateWindowCost()

void EstimateWindowCost ( AccessPath * path )

Estimate the costs and row count for a WINDOW AccessPath.

◆ FindOutputRowsForJoin()

double FindOutputRowsForJoin	(	THD *	thd,
		double	left_rows,
		double	right_rows,
		const JoinPredicate *	edge
	)

inline

Estimate the number of output rows from joining two relations.

Parameters

thd	The current thread.
left_rows	Number of rows in the left hand relation.
right_rows	Number of rows in the right hand relation.
edge	The join between the two relations.

Variable Documentation

◆ kAggregateOneRowCost

constexpr double kAggregateOneRowCost = 0.1

constexpr

◆ kApplyOneFilterCost

constexpr double kApplyOneFilterCost = 0.1

constexpr

◆ kHashBuildOneRowCost

constexpr double kHashBuildOneRowCost = 0.1

constexpr

◆ kHashProbeOneRowCost

constexpr double kHashProbeOneRowCost = 0.1

constexpr

◆ kHashReturnOneRowCost

constexpr double kHashReturnOneRowCost = 0.07

constexpr

◆ kMaterializeOneRowCost

constexpr double kMaterializeOneRowCost = 0.1

constexpr

◆ kMaxItemLengthEstimate

constexpr size_t kMaxItemLengthEstimate = 4096

constexpr

When we make cost estimates, we use this as the maximal length the values we get from evaluating an Item (in bytes).

Actual values of e.g. blobs may be much longer, but even so we use this as an upper limit when doing cost calculations. (For context,

See also: Item::max_length .)

◆ kRowEstimateFallback

constexpr ha_rows kRowEstimateFallback = 1000

constexpr

A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for table functions).

It's a fairly arbitrary non-zero value.

◆ kSortOneRowCost

constexpr double kSortOneRowCost = 0.1

constexpr

◆ kWindowOneRowCost

constexpr double kWindowOneRowCost = 0.1

constexpr

Classes

Functions

Variables

Function Documentation

◆ AddCost()

◆ EstimateAggregateCost()

◆ EstimateCostForRefAccess()

◆ EstimateDeleteRowsCost()

◆ EstimateDistinctRows()

◆ EstimateFilterCost() [1/2]

◆ EstimateFilterCost() [2/2]

◆ EstimateLimitOffsetCost()

◆ EstimateMaterializeCost()

◆ EstimateSemijoinFanOut()

◆ EstimateSortCost()

◆ EstimateStreamCost()

◆ EstimateUpdateRowsCost()

◆ EstimateWindowCost()

◆ FindOutputRowsForJoin()

Variable Documentation

◆ kAggregateOneRowCost

◆ kApplyOneFilterCost

◆ kHashBuildOneRowCost

◆ kHashProbeOneRowCost

◆ kHashReturnOneRowCost

◆ kMaterializeOneRowCost

◆ kMaxItemLengthEstimate

◆ kRowEstimateFallback

◆ kSortOneRowCost

◆ kWindowOneRowCost