24#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
135 THD *thd,
double num_rows,
142 AddCost(thd, subquery, num_rows, &cost);
317 if (
table->s->is_missing_primary_key())
return false;
318 return key_idx ==
table->s->primary_key &&
319 table->file->primary_key_is_clustered();
400 constexpr unsigned kMinEstimatedBlockSize = 4096;
401 constexpr unsigned kMaxEstimatedBlockSize = 65536;
402 return std::clamp(
table->file->stats.block_size, kMinEstimatedBlockSize,
403 kMaxEstimatedBlockSize);
438 int64_t max_bytes{0};
440 for (uint i = 0; i <
table->s->fields; i++) {
442 max_bytes +=
table->field[i]->max_data_length();
447 return {.record_bytes =
451 .overflow_probability = 0.0};
476 table->key_info[key_idx].key_length +
table->file->ref_length;
499 ?
table->bytes_per_row()->record_bytes
505 double records_per_page =
506 std::max(1.0,
static_cast<double>(block_size) / bytes_per_row);
517 double r = 1.0 + records_per_page;
518 while (r < table->
file->stats.records) {
519 r =
r * (1.0 + records_per_page);
556inline double RowReadCost(
double num_rows,
double fields_read_per_row,
557 double bytes_per_row) {
576 num_rows, fields_read_per_row,
599 constexpr double kDefaultFieldsReadFromCoveringIndex = 2;
600 double fields_read_per_row =
table->covering_keys.is_set(key_idx)
602 : kDefaultFieldsReadFromCoveringIndex;
605 return RowReadCost(num_rows, fields_read_per_row, bytes_per_row) +
672 assert(key_idx < table->s->keys);
675 return 0.5 * (cost_with_ahi + cost_without_ahi);
707 double num_output_rows);
720 1.0,
table->file->stats.records);
733 double num_output_rows) {
738 constexpr double kRefAccessCostDiscount = 0.05;
739 return (1.0 - kRefAccessCostDiscount) *
741 1.0, num_output_rows);
constexpr double kUnknownRowCount
To indicate that a row estimate is not yet made.
Definition: access_path.h:196
This class represents the cost of a hash join (excluding the cost of sub-paths).
Definition: cost_model.h:765
double m_init_cost
The cost of preparing to produce the first result row.
Definition: cost_model.h:781
double spill_to_disk_probability() const
Definition: cost_model.h:769
double init_cost() const
Definition: cost_model.h:773
double m_spill_to_disk_probability
The probability (range [0.0, 1.0]) of needing spill to disk.
Definition: cost_model.h:779
HashJoinCost(THD *thd, const HashJoinMetrics &metrics)
Definition: cost_model.cc:1804
double cost() const
Definition: cost_model.h:775
double m_cost
The cost of the hash join.
Definition: cost_model.h:783
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:928
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1179
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Hypergraph optimizer cost constants.
constexpr double kReadOneFieldCost
Cost of per field in the read set.
Definition: cost_constants.h:85
constexpr double kApplyOneFilterCost
Cost of evaluating one filter on one row.
Definition: cost_constants.h:117
constexpr double kReadOneRowCost
Fixed cost of reading a row from the storage engine into the record buffer.
Definition: cost_constants.h:81
constexpr double kIndexLookupPageCost
The cost per page that is visited when performing an index lookup in an InnoDB B-tree.
Definition: cost_constants.h:129
constexpr double kIndexLookupFixedCost
Fixed cost of an index lookup when AHI is enabled (default).
Definition: cost_constants.h:132
constexpr double kReadOneByteCost
Overhead per byte when reading a row.
Definition: cost_constants.h:95
double RowReadCost(double num_rows, double fields_read_per_row, double bytes_per_row)
Computes the expected cost of reading a number of rows.
Definition: cost_model.h:556
constexpr ha_rows kRowEstimateFallback
A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for ...
Definition: cost_model.h:56
double EstimateRefAccessCost(const TABLE *table, unsigned key_idx, double num_output_rows)
Estimates the cost of an index lookup (ref access).
Definition: cost_model.h:732
double TableAccessIOCost(const TABLE *table, double num_rows, BytesPerTableRow row_size)
Calculate the IO-cost of reading 'num_rows' rows from 'table'.
Definition: cost_model.cc:471
void EstimateSortCost(THD *thd, AccessPath *path, double distinct_rows=kUnknownRowCount)
Estimate costs and output rows for a SORT AccessPath.
Definition: cost_model.cc:658
void EstimateTemptableAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate the costs and row count for a Temp table Aggregate AccessPath.
Definition: cost_model.cc:1710
RangeScanType
Type of range scan operation.
Definition: cost_model.h:679
@ kSingleRange
Plain range scan, without the MRR optimization.
@ kMultiRange
Using the MRR optimization.
BytesPerTableRow EstimateBytesPerRowTable(const TABLE *table)
Estimates the number of bytes that MySQL must process when reading a row from a table,...
Definition: cost_model.h:437
void EstimateWindowCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1754
bool IsClusteredPrimaryKey(const TABLE *table, unsigned key_idx)
Determines whether a given key on a table is both clustered and primary.
Definition: cost_model.h:316
double CoveringIndexAccessIOCost(const TABLE *table, unsigned key_idx, double num_rows)
Calculate the IO-cost of doing a lookup on index 'key_idx' on 'table' and then read 'num_rows' rows.
Definition: cost_model.cc:526
double EstimateGroupSkipScanCost(TABLE *table, uint key_idx, uint num_groups, bool has_max)
Estimate costs for a group skip scan operation.
Definition: cost_model.cc:1612
double EstimateTableScanCost(const TABLE *table)
Estimates the cost of a full table scan.
Definition: cost_model.h:617
constexpr size_t kMaxItemLengthEstimate
When we make cost estimates, we use this as the maximal length the values we get from evaluating an I...
Definition: cost_model.h:51
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:715
double EstimateIndexScanCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index scan.
Definition: cost_model.h:718
void EstimateLimitOffsetCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1677
int64_t GetReadSetWidth(const TABLE *table)
Get an estimate of the row size of the read set of 'table'.
Definition: cost_model.cc:395
double RowReadCostIndex(const TABLE *table, unsigned key_idx, double num_rows)
Computes the cost of reading a number of rows from an index.
Definition: cost_model.h:591
std::span< const Item *const > TermArray
Array of aggregation terms.
Definition: cost_model.h:160
double EstimateSkipScanCost(TABLE *table, uint key_idx, uint num_subrange_scans, ha_rows records)
Estimate costs and result row count for a skip scan operation.
Definition: cost_model.cc:1603
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:1620
constexpr unsigned kMinEstimatedBytesPerRow
The minimum number of bytes to return for row length estimates.
Definition: cost_model.h:325
int IndexHeight(const TABLE *table, unsigned key_idx)
Estimates the height of a B-tree index.
Definition: cost_model.h:496
double EstimateIndexRangeScanCost(const TABLE *table, unsigned key_idx, RangeScanType scan_type, double num_ranges, double num_output_rows)
Estimates the cost of an index range scan.
Definition: cost_model.cc:571
double FindOutputRowsForJoin(THD *thd, double left_rows, double right_rows, const JoinPredicate *edge)
Estimate the number of output rows from joining two relations.
Definition: cost_model.h:269
void EstimateUpdateRowsCost(AccessPath *path)
Definition: cost_model.cc:1637
constexpr double kBlockFillFactor
This is the estimated fraction of an (innodb) block that is in use (i.e.
Definition: cost_model.h:91
unsigned ClampedBlockSize(const TABLE *table)
We clamp the block size to lie in the interval between the max and min allowed block size for InnoDB ...
Definition: cost_model.h:399
constexpr double kIOStartCost
We model the IO cost for InnoDB tables with the DYNAMIC row format.
Definition: cost_model.h:84
double EstimateDistinctRows(THD *thd, double child_rows, TermArray terms)
Estimate the number of rows with a distinct combination of values for 'terms'.
Definition: cost_model.cc:1552
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Provide row estimates and costs for a MATERIALIZE AccessPath.
Definition: cost_model.cc:880
double IndexLookupCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index lookup.
Definition: cost_model.h:671
void EstimateStreamCost(THD *thd, AccessPath *path)
Estimate the costs and row count for a STREAM AccessPath.
Definition: cost_model.cc:1654
void EstimateAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate costs and result row count for an aggregate operation.
Definition: cost_model.cc:1581
unsigned EstimateBytesPerRowIndex(const TABLE *table, unsigned key_idx)
Estimates the number of bytes that MySQL must process when reading a row from a secondary index,...
Definition: cost_model.h:468
BytesPerTableRow EstimateBytesPerRowWideTable(const TABLE *table)
Estimate the average number of bytes that we need to read from the storage engine when reading a row ...
Definition: cost_model.cc:408
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:756
constexpr unsigned kMaxEstimatedBytesPerRow
The maximum number of bytes to return for row length estimates.
Definition: cost_model.h:335
double EstimateSemijoinFanOut(THD *thd, double right_rows, const JoinPredicate &edge)
Estimate the fan out for a left semijoin or a left antijoin.
Definition: cost_model.cc:1764
double RowReadCostTable(const TABLE *table, double num_rows)
Computes the cost of reading a number of rows from a table.
Definition: cost_model.h:572
constexpr double kIOByteCost
The additional cost of reading an extra byte from disk.
Definition: cost_model.h:87
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1217
uint bitmap_bits_set(const MY_BITMAP *map)
Definition: my_bitmap.cc:416
static char * path
Definition: mysqldump.cc:150
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
ValueType max(X &&first)
Definition: gtid.h:103
static PSI_metric_info_v1 metrics[]
Definition: plugin.cc:83
T clamp(U x)
Definition: ut0ut.h:412
const mysql_service_registry_t * r
Definition: pfs_example_plugin_employee.cc:86
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:238
This struct represents the number of bytes we expect to read for a table row.
Definition: cost_model.h:347
int64_t overflow_bytes
The number of bytes read from overflow pages.
Definition: cost_model.h:359
double overflow_probability
Definition: cost_model.h:366
int64_t record_bytes
The number of bytes read from the B-tree record.
Definition: cost_model.h:352
This class represents a subquery contained in some subclass of Item_subselect,.
Definition: item.h:860
See EstimateFilterCost.
Definition: cost_model.h:94
double init_cost_if_not_materialized
Initial cost before the filter can be applied for the first time.
Definition: cost_model.h:104
double cost_to_materialize
Cost of materializing all subqueries present in the filter.
Definition: cost_model.h:113
double cost_if_materialized
Cost of evaluating the filter for all rows if all subqueries in it have been materialized beforehand.
Definition: cost_model.h:109
double cost_if_not_materialized
Cost of evaluating the filter for all rows if subqueries are not materialized.
Definition: cost_model.h:98
Input to HashJoinCost, for calculating the cost of a hash join.
Definition: cost_model.h:747
double probe_row_size
The average size of rows in the 'probe' input (in bytes).
Definition: cost_model.h:757
double build_rows
The number of rows in the 'build' input.
Definition: cost_model.h:749
double key_size
The size of the join key, in bytes.
Definition: cost_model.h:753
double build_row_size
The average size of rows in the 'build' input (in bytes).
Definition: cost_model.h:751
double probe_rows
The number of rows in the 'probe' input.
Definition: cost_model.h:755
double result_rows
The number of rows in the result set.
Definition: cost_model.h:759
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:80
RelationalExpression * expr
Definition: access_path.h:81
double selectivity
Definition: access_path.h:82
enum RelationalExpression::Type type
@ SEMIJOIN
Left semijoin.
Definition: relational_expression.h:161
@ MULTI_INNER_JOIN
Definition: relational_expression.h:183
@ STRAIGHT_INNER_JOIN
Definition: relational_expression.h:169
@ ANTIJOIN
Left antijoin.
Definition: relational_expression.h:164
@ INNER_JOIN
Definition: relational_expression.h:157
@ FULL_OUTER_JOIN
Definition: relational_expression.h:176
@ TABLE
Definition: relational_expression.h:185
@ LEFT_JOIN
Definition: relational_expression.h:158