![]() |
MySQL 9.3.0
Source Code Documentation
|
#include "sql/join_optimizer/cost_model.h"
#include <math.h>
#include <stdio.h>
#include <algorithm>
#include <bit>
#include <cmath>
#include <iterator>
#include "mem_root_deque.h"
#include "my_base.h"
#include "my_bitmap.h"
#include "sql/handler.h"
#include "sql/histograms/histogram.h"
#include "sql/item_func.h"
#include "sql/item_subselect.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/bit_utils.h"
#include "sql/join_optimizer/find_contained_subqueries.h"
#include "sql/join_optimizer/join_optimizer.h"
#include "sql/join_optimizer/materialize_path_parameters.h"
#include "sql/join_optimizer/optimizer_trace.h"
#include "sql/join_optimizer/overflow_bitset.h"
#include "sql/join_optimizer/print_utils.h"
#include "sql/join_optimizer/relational_expression.h"
#include "sql/join_optimizer/secondary_statistics.h"
#include "sql/mem_root_array.h"
#include "sql/mysqld.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_trace.h"
#include "sql/sql_class.h"
#include "sql/sql_const.h"
#include "sql/sql_lex.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_planner.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "template_utils.h"
Classes | |
class | anonymous_namespace{cost_model.cc}::AggregateRowEstimator |
This class finds disjoint sets of aggregation terms that form prefixes of some non-hash index, and makes row estimates for those sets based on index metadata. More... | |
struct | anonymous_namespace{cost_model.cc}::AggregateRowEstimator::Prefix |
A prefix of some key where each key_part corresponds to an aggregation term. More... | |
Namespaces | |
namespace | anonymous_namespace{cost_model.cc} |
Typedefs | |
using | anonymous_namespace{cost_model.cc}::TermArray = Bounds_checked_array< const Item *const > |
Array of aggregation terms. More... | |
Functions | |
static double | AggregateCost (THD *thd, double output_rows, double input_rows, int agg_count, int group_by_field_count) |
Calculate the estimated cost of Streaming Aggregation, i.e. More... | |
static double | MaterializationWithDedupCost (bool use_old_model, double output_rows, double input_rows, int field_count) |
Calculate the estimated initialization cost of a MATERIALIZE Accesspath that involves deduplication. More... | |
static double | MaterializationCost (bool use_old_model, double output_rows, int field_count) |
Calculate the estimated cost of a MATERIALIZE Accesspath that does not involve deduplication. More... | |
static double | TempTableScanCost (THD *thd, AccessPath *table_path, double output_rows) |
Calculate the estimated cost of a Table scan Accesspath for a temporary table created for materialization. More... | |
static double | StreamCost (THD *thd, double output_rows, int field_count) |
Calculate the estimated cost of a STREAM Accesspath. More... | |
static double | AddInnodbEngineCostOverhead (double temptable_engine_cost, double temp_table_size, double output_rows, const mem_root_deque< Item * > &join_fields) |
Add InnoDB engine cost overhead into the in-memory table cost if the estimated temp table size exceeds tmp_table_size. More... | |
static double | TempTableAggregationCost (THD *thd, double output_rows, double input_rows, int agg_count, int group_by_fields, const mem_root_deque< Item * > &join_fields) |
Calculate the estimated initialization cost of a TEMPTABLE_AGGREGATE Accesspath This involves the cost for deduplicating input rows, inserting them into the temp table, and processing the aggregation functions. More... | |
BytesPerTableRow | EstimateBytesPerRowWideTable (const TABLE *table, int64_t max_row_size) |
Estimate the average number of bytes that we need to read from the storage engine when reading a row from 'table'. More... | |
static double | LowerCacheHitRatio (const handler *file, double file_size) |
Estimate a lower limit for the cache hit ratio when reading from an index (or base table), based on the size of the index relative to that of the buffer pool. More... | |
double | TableAccessIOCost (const TABLE *table, double num_rows, BytesPerTableRow row_size) |
Calculate the IO-cost of reading 'num_rows' rows from 'table'. More... | |
double | CoveringIndexAccessIOCost (const TABLE *table, unsigned key_idx, double num_rows) |
Calculate the IO-cost of doing a lookup on index 'key_idx' on 'table' and then read 'num_rows' rows. More... | |
double | EstimateIndexRangeScanCost (const TABLE *table, unsigned key_idx, RangeScanType scan_type, double num_ranges, double num_output_rows) |
Estimates the cost of an index range scan. More... | |
double | anonymous_namespace{cost_model.cc}::EstimateAggregateRows (THD *thd, const AccessPath *child, const Query_block *query_block, bool rollup) |
Estimate the row count for an aggregate operation (including ROLLUP rows for GROUP BY ... WITH ROLLUP). More... | |
void | EstimateSortCost (THD *thd, AccessPath *path, double distinct_rows) |
Estimate costs and output rows for a SORT AccessPath. More... | |
void | AddCost (THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost) |
Used internally by EstimateFilterCost() only. More... | |
FilterCost | EstimateFilterCost (THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block) |
Estimate the cost of evaluating “condition”, “num_rows” times. More... | |
static void | AddOperandCosts (const MaterializePathParameters::Operand &operand, double *subquery_cost, double *cost_for_cacheable) |
static void | SetDistinctGroupByOutputRowsAndSubqueryCosts (THD *thd, AccessPath *path, double *subquery_cost, double *cost_for_cacheable) |
static double | GetDistinctOrGroupByInitCost (bool use_old_cost_model, AccessPath *path) |
Return the cost for materialization used for DISTINCT or GROUP BY, which essentially involves deduplication cost. More... | |
static void | AccummulateOutputRowsAndSubqueryCosts (AccessPath *path, double *subquery_cost, double *cost_for_cacheable) |
void | EstimateMaterializeCost (THD *thd, AccessPath *path) |
Provide row estimates and costs for a MATERIALIZE AccessPath. More... | |
TermArray | anonymous_namespace{cost_model.cc}::GetAggregationTerms (const JOIN &join) |
double | anonymous_namespace{cost_model.cc}::EstimateDistinctRowsFromStatistics (THD *thd, TermArray terms, double child_rows) |
Estimate the number of distinct tuples in the projection defined by 'terms'. More... | |
template<typename FunctionLow , typename FunctionHigh > | |
double | anonymous_namespace{cost_model.cc}::SmoothTransition (FunctionLow function_low, FunctionHigh function_high, double lower_limit, double upper_limit, double argument) |
For a function f(x) such that: f(x) = g(x) for x<=l f(x) = h(x) for x>l. More... | |
double | anonymous_namespace{cost_model.cc}::EstimateRollupRowsPrimitively (double aggregate_rows, size_t grouping_expressions) |
Do a cheap rollup row estimate for small result sets. More... | |
double | anonymous_namespace{cost_model.cc}::EstimateRollupRowsAdvanced (THD *thd, double aggregate_rows, TermArray terms) |
Do more precise rollup row estimate for larger result sets. More... | |
double | EstimateDistinctRows (THD *thd, double child_rows, TermArray terms) |
void | EstimateAggregateCost (THD *thd, AccessPath *path, const Query_block *query_block) |
Estimate costs and result row count for an aggregate operation. More... | |
void | EstimateDeleteRowsCost (AccessPath *path) |
void | EstimateUpdateRowsCost (AccessPath *path) |
void | EstimateStreamCost (THD *thd, AccessPath *path) |
Estimate the costs and row count for a STREAM AccessPath. More... | |
void | EstimateLimitOffsetCost (AccessPath *path) |
Estimate the costs and row count for a WINDOW AccessPath. More... | |
void | EstimateTemptableAggregateCost (THD *thd, AccessPath *path, const Query_block *query_block) |
Estimate the costs and row count for a Temp table Aggregate AccessPath. More... | |
void | EstimateWindowCost (AccessPath *path) |
Estimate the costs and row count for a WINDOW AccessPath. More... | |
double | EstimateSemijoinFanOut (THD *thd, double right_rows, const JoinPredicate &edge) |
Estimate the fan out for a left semijoin or a left antijoin. More... | |
Variables | |
constexpr double | kTempTableCreationCost = 3 |
|
static |
void AddCost | ( | THD * | thd, |
const ContainedSubquery & | subquery, | ||
double | num_rows, | ||
FilterCost * | cost | ||
) |
Used internally by EstimateFilterCost() only.
|
static |
Add InnoDB engine cost overhead into the in-memory table cost if the estimated temp table size exceeds tmp_table_size.
temptable_engine_cost | Pre-calculated cost of in-memory table |
temp_table_size | 'tmp_table_size' system variable value. |
output_rows | Number of rows that the path outputs. |
join_fields | Reference to join fields. These are used to estimate the temp table row length. See get_tmp_table_rec_length() for details. |
|
static |
|
static |
Calculate the estimated cost of Streaming Aggregation, i.e.
AGGREGATE Accesspath.
thd | Current thread. |
output_rows | Number of rows that the path outputs. |
input_rows | Number of input rows to the path. |
agg_count | Number of aggregation functions present in the path. |
group_by_field_count | Number of GROUP BY columns used in the path. In the absence of GROUP BY clause, it can be 0. |
Note: This aggregation cost is independent of the cost of Temp table aggregation, and these two paths do not share any logic or cost constants.
double CoveringIndexAccessIOCost | ( | const TABLE * | table, |
unsigned | key_idx, | ||
double | num_rows | ||
) |
Calculate the IO-cost of doing a lookup on index 'key_idx' on 'table' and then read 'num_rows' rows.
void EstimateAggregateCost | ( | THD * | thd, |
AccessPath * | path, | ||
const Query_block * | query_block | ||
) |
Estimate costs and result row count for an aggregate operation.
[in,out] | thd | The current thread. |
[in,out] | path | The AGGREGATE path. |
[in] | query_block | The Query_block to which 'path' belongs. |
BytesPerTableRow EstimateBytesPerRowWideTable | ( | const TABLE * | table, |
int64_t | max_row_size | ||
) |
Estimate the average number of bytes that we need to read from the storage engine when reading a row from 'table'.
This is the size of the (b-tree) record and the overflow pages of any field that is part of the projection. This is similar to what EstimateBytesPerRowTable() does, but this function is intended to do a more accurate but also more expensive calculation for tables with potentially large rows (i.e. tables with BLOBs or large VARCHAR fields).
Note that this function tailored for InnoDB (and also for the DYNAMIC row format of InnoDB). At some point we may want to move this logic to the handler, so that we can customize it for other engines as well.
table | The target table |
max_row_size | The row size if all variable-sized fields are full. |
void EstimateDeleteRowsCost | ( | AccessPath * | path | ) |
double EstimateDistinctRows | ( | THD * | thd, |
double | child_rows, | ||
TermArray | terms | ||
) |
FilterCost EstimateFilterCost | ( | THD * | thd, |
double | num_rows, | ||
Item * | condition, | ||
const Query_block * | outer_query_block | ||
) |
Estimate the cost of evaluating “condition”, “num_rows” times.
This is a fairly rudimentary estimation, but it includes the cost of any subqueries that may be present and that need evaluation.
double EstimateIndexRangeScanCost | ( | const TABLE * | table, |
unsigned | key_idx, | ||
RangeScanType | scan_type, | ||
double | num_ranges, | ||
double | num_output_rows | ||
) |
Estimates the cost of an index range scan.
The cost model for index range scans accounts for the index lookup cost as well as the cost of reading rows. Both index scans and ref accesses can be viewed as special cases of index range scans, so the cost functions for those operations call this function under the hood.
table | The table to which the index belongs. |
key_idx | The position of the key in table->key_info[]. |
scan_type | Whether this an MRR or a regular index range scan. |
num_ranges | The number of ranges. |
num_output_rows | The estimated expected number of output rows. |
void EstimateLimitOffsetCost | ( | AccessPath * | path | ) |
Estimate the costs and row count for a WINDOW AccessPath.
As described in
void EstimateMaterializeCost | ( | THD * | thd, |
AccessPath * | path | ||
) |
Provide row estimates and costs for a MATERIALIZE AccessPath.
MATERIALIZE AccessPath is created by both old and new optimizer in many different contexts where temp table needs to be created, both with and without deduplication. E.g. materialization for a derived table, materializing deduplicated input rows for DISTINCT, GROUP BY clause without an aggregation functions, SET operations, etc
Note:
double EstimateSemijoinFanOut | ( | THD * | thd, |
double | right_rows, | ||
const JoinPredicate & | edge | ||
) |
Estimate the fan out for a left semijoin or a left antijoin.
The fan out is defined as the number of result rows, divided by the number of input rows from the left hand relation. For a semijoin, J1:
SELECT ... FROM t1 WHERE EXISTS (SELECT ... FROM t2 WHERE predicate)
we know that the fan out of the corresponding inner join J2:
SELECT ... FROM t1, t2 WHERE predicate
is: F(J2) = CARD(t2) * SELECTIVITY(predicate) , where CARD(t2)=right_rows, and SELECTIVITY(predicate)=edge.selectivity. If 'predicate' is a deterministic function of t1 and t2 rows, then J1 is equivalent to an inner join J3:
SELECT ... FROM t1 JOIN (SELECT DISTINCT f1,..fn FROM t2) d ON predicate
where f1,..fn are those fields from t2 that appear in the predicate.
Then F(J1) = F(J3) = F(J2) * CARD(d) / CARD(t2) = CARD(d) * SELECTIVITY(predicate).
This function therefore collects f1..fn and estimates CARD(d). As a special case, 'predicate' may be independent of t2. The query is then equivalent to:
SELECT ... FROM t1 WHERE predicate AND (SELECT COUNT(*) FROM t2) > 0
The fan out is then the selectivity of 'predicate' multiplied by the probability of t2 having at least one row.
thd | The current thread. |
right_rows | The number of input rows from the right hand relation. |
edge | Join edge. |
void EstimateSortCost | ( | THD * | thd, |
AccessPath * | path, | ||
double | distinct_rows = kUnknownRowCount |
||
) |
Estimate costs and output rows for a SORT AccessPath.
thd | Current thread. |
path | the AccessPath. |
distinct_rows | An estimate of the number of distinct rows, if remove_duplicates==true and we have an estimate already. |
void EstimateStreamCost | ( | THD * | thd, |
AccessPath * | path | ||
) |
Estimate the costs and row count for a STREAM AccessPath.
void EstimateTemptableAggregateCost | ( | THD * | thd, |
AccessPath * | path, | ||
const Query_block * | query_block | ||
) |
Estimate the costs and row count for a Temp table Aggregate AccessPath.
void EstimateUpdateRowsCost | ( | AccessPath * | path | ) |
void EstimateWindowCost | ( | AccessPath * | path | ) |
Estimate the costs and row count for a WINDOW AccessPath.
|
static |
Return the cost for materialization used for DISTINCT or GROUP BY, which essentially involves deduplication cost.
|
static |
Estimate a lower limit for the cache hit ratio when reading from an index (or base table), based on the size of the index relative to that of the buffer pool.
file | The handler to which the index or base table belongs. |
file_size | The size of the index or base table, in bytes. |
|
static |
Calculate the estimated cost of a MATERIALIZE Accesspath that does not involve deduplication.
use_old_model | If false, use the hypergraph cost model based on linear regression. If true, continue to use the earlier cost model. |
output_rows | Number of rows that the path outputs. |
field_count | Number of fields present in the materialized rows, which translates to the number of temp table columns. |
|
static |
Calculate the estimated initialization cost of a MATERIALIZE Accesspath that involves deduplication.
This involves the cost for deduplicating input rows and inserting them into the temp table.
use_old_model | If false, use the hypergraph cost model based on linear regression. If true, continue to use the earlier cost model. |
output_rows | Number of rows that the path outputs. |
input_rows | Number of input rows to the path. |
field_count | Number of GROUP BY or DISTINCT columns present. |
|
static |
|
static |
Calculate the estimated cost of a STREAM Accesspath.
thd | Current thread. |
output_rows | Number of rows that the path outputs, which is same as input rows. |
field_count | Number of fields present in the streamed rows, which typically translates to the number of JOIN fields. |
double TableAccessIOCost | ( | const TABLE * | table, |
double | num_rows, | ||
BytesPerTableRow | row_size | ||
) |
Calculate the IO-cost of reading 'num_rows' rows from 'table'.
|
static |
Calculate the estimated initialization cost of a TEMPTABLE_AGGREGATE Accesspath This involves the cost for deduplicating input rows, inserting them into the temp table, and processing the aggregation functions.
Cost estimation for this path was introduced only in hypergraph optimizer.
thd | Current thread. |
output_rows | Number of rows that the path outputs. |
input_rows | Number of input rows to the path. |
agg_count | Number of aggregation functions present in the path. |
group_by_fields | Number of GROUP BY columns present. |
join_fields | Reference to join fields. These are used to estimate the temp table row length. See get_tmp_table_rec_length() for details. |
|
static |
Calculate the estimated cost of a Table scan Accesspath for a temporary table created for materialization.
thd | Current thread. |
table_path | The TABLE_SCAN AccessPath of the temporary table. |
output_rows | Number of rows that the path outputs. |
|
constexpr |