24#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
135 THD *thd,
double num_rows,
142 AddCost(thd, subquery, num_rows, &cost);
294 if (
table->s->is_missing_primary_key())
return false;
295 return key_idx ==
table->s->primary_key &&
296 table->file->primary_key_is_clustered();
365 int64_t max_row_size);
374 constexpr unsigned kMinEstimatedBlockSize = 4096;
375 constexpr unsigned kMaxEstimatedBlockSize = 65536;
376 return std::clamp(
table->file->stats.block_size, kMinEstimatedBlockSize,
377 kMaxEstimatedBlockSize);
412 int64_t max_bytes{0};
414 for (uint i = 0; i <
table->s->fields; i++) {
416 max_bytes +=
table->field[i]->field_length;
421 return {.record_bytes =
425 .overflow_probability = 0.0};
450 table->key_info[key_idx].key_length +
table->file->ref_length;
473 ?
table->bytes_per_row()->record_bytes
479 double records_per_page =
480 std::max(1.0,
static_cast<double>(block_size) / bytes_per_row);
491 double r = 1.0 + records_per_page;
492 while (r < table->
file->stats.records) {
493 r =
r * (1.0 + records_per_page);
530inline double RowReadCost(
double num_rows,
double fields_read_per_row,
531 double bytes_per_row) {
550 num_rows, fields_read_per_row,
573 constexpr double kDefaultFieldsReadFromCoveringIndex = 2;
574 double fields_read_per_row =
table->covering_keys.is_set(key_idx)
576 : kDefaultFieldsReadFromCoveringIndex;
579 return RowReadCost(num_rows, fields_read_per_row, bytes_per_row) +
646 assert(key_idx < table->s->keys);
649 return 0.5 * (cost_with_ahi + cost_without_ahi);
681 double num_output_rows);
694 1.0,
table->file->stats.records);
707 double num_output_rows) {
712 constexpr double kRefAccessCostDiscount = 0.05;
713 return (1.0 - kRefAccessCostDiscount) *
715 1.0, num_output_rows);
constexpr double kUnknownRowCount
To indicate that a row estimate is not yet made.
Definition: access_path.h:196
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:927
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1179
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Hypergraph optimizer cost constants.
constexpr double kReadOneFieldCost
Cost of per field in the read set.
Definition: cost_constants.h:85
constexpr double kApplyOneFilterCost
Cost of evaluating one filter on one row.
Definition: cost_constants.h:117
constexpr double kReadOneRowCost
Fixed cost of reading a row from the storage engine into the record buffer.
Definition: cost_constants.h:81
constexpr double kIndexLookupPageCost
The cost per page that is visited when performing an index lookup in an InnoDB B-tree.
Definition: cost_constants.h:129
constexpr double kIndexLookupFixedCost
Fixed cost of an index lookup when AHI is enabled (default).
Definition: cost_constants.h:132
constexpr double kReadOneByteCost
Overhead per byte when reading a row.
Definition: cost_constants.h:95
double RowReadCost(double num_rows, double fields_read_per_row, double bytes_per_row)
Computes the expected cost of reading a number of rows.
Definition: cost_model.h:530
constexpr ha_rows kRowEstimateFallback
A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for ...
Definition: cost_model.h:56
double EstimateRefAccessCost(const TABLE *table, unsigned key_idx, double num_output_rows)
Estimates the cost of an index lookup (ref access).
Definition: cost_model.h:706
double TableAccessIOCost(const TABLE *table, double num_rows, BytesPerTableRow row_size)
Calculate the IO-cost of reading 'num_rows' rows from 'table'.
Definition: cost_model.cc:418
void EstimateSortCost(THD *thd, AccessPath *path, double distinct_rows=kUnknownRowCount)
Estimate costs and output rows for a SORT AccessPath.
Definition: cost_model.cc:605
void EstimateTemptableAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate the costs and row count for a Temp table Aggregate AccessPath.
Definition: cost_model.cc:1640
RangeScanType
Type of range scan operation.
Definition: cost_model.h:653
@ kSingleRange
Plain range scan, without the MRR optimization.
@ kMultiRange
Using the MRR optimization.
BytesPerTableRow EstimateBytesPerRowTable(const TABLE *table)
Estimates the number of bytes that MySQL must process when reading a row from a table,...
Definition: cost_model.h:411
void EstimateWindowCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1684
bool IsClusteredPrimaryKey(const TABLE *table, unsigned key_idx)
Determines whether a given key on a table is both clustered and primary.
Definition: cost_model.h:293
double CoveringIndexAccessIOCost(const TABLE *table, unsigned key_idx, double num_rows)
Calculate the IO-cost of doing a lookup on index 'key_idx' on 'table' and then read 'num_rows' rows.
Definition: cost_model.cc:473
double EstimateTableScanCost(const TABLE *table)
Estimates the cost of a full table scan.
Definition: cost_model.h:591
constexpr size_t kMaxItemLengthEstimate
When we make cost estimates, we use this as the maximal length the values we get from evaluating an I...
Definition: cost_model.h:51
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:662
double EstimateIndexScanCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index scan.
Definition: cost_model.h:692
void EstimateLimitOffsetCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1607
double RowReadCostIndex(const TABLE *table, unsigned key_idx, double num_rows)
Computes the cost of reading a number of rows from an index.
Definition: cost_model.h:565
std::span< const Item *const > TermArray
Array of aggregation terms.
Definition: cost_model.h:160
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:1550
constexpr unsigned kMinEstimatedBytesPerRow
The minimum number of bytes to return for row length estimates.
Definition: cost_model.h:302
int IndexHeight(const TABLE *table, unsigned key_idx)
Estimates the height of a B-tree index.
Definition: cost_model.h:470
double EstimateIndexRangeScanCost(const TABLE *table, unsigned key_idx, RangeScanType scan_type, double num_ranges, double num_output_rows)
Estimates the cost of an index range scan.
Definition: cost_model.cc:518
double FindOutputRowsForJoin(THD *thd, double left_rows, double right_rows, const JoinPredicate *edge)
Estimate the number of output rows from joining two relations.
Definition: cost_model.h:246
void EstimateUpdateRowsCost(AccessPath *path)
Definition: cost_model.cc:1567
constexpr double kBlockFillFactor
This is the estimated fraction of an (innodb) block that is in use (i.e.
Definition: cost_model.h:91
unsigned ClampedBlockSize(const TABLE *table)
We clamp the block size to lie in the interval between the max and min allowed block size for InnoDB ...
Definition: cost_model.h:373
constexpr double kIOStartCost
We model the IO cost for InnoDB tables with the DYNAMIC row format.
Definition: cost_model.h:84
double EstimateDistinctRows(THD *thd, double child_rows, TermArray terms)
Estimate the number of rows with a distinct combination of values for 'terms'.
Definition: cost_model.cc:1499
BytesPerTableRow EstimateBytesPerRowWideTable(const TABLE *table, int64_t max_row_size)
Estimate the average number of bytes that we need to read from the storage engine when reading a row ...
Definition: cost_model.cc:317
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Provide row estimates and costs for a MATERIALIZE AccessPath.
Definition: cost_model.cc:827
double IndexLookupCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index lookup.
Definition: cost_model.h:645
void EstimateStreamCost(THD *thd, AccessPath *path)
Estimate the costs and row count for a STREAM AccessPath.
Definition: cost_model.cc:1584
void EstimateAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate costs and result row count for an aggregate operation.
Definition: cost_model.cc:1528
unsigned EstimateBytesPerRowIndex(const TABLE *table, unsigned key_idx)
Estimates the number of bytes that MySQL must process when reading a row from a secondary index,...
Definition: cost_model.h:442
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:703
constexpr unsigned kMaxEstimatedBytesPerRow
The maximum number of bytes to return for row length estimates.
Definition: cost_model.h:312
double EstimateSemijoinFanOut(THD *thd, double right_rows, const JoinPredicate &edge)
Estimate the fan out for a left semijoin or a left antijoin.
Definition: cost_model.cc:1694
double RowReadCostTable(const TABLE *table, double num_rows)
Computes the cost of reading a number of rows from a table.
Definition: cost_model.h:546
constexpr double kIOByteCost
The additional cost of reading an extra byte from disk.
Definition: cost_model.h:87
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1217
uint bitmap_bits_set(const MY_BITMAP *map)
Definition: my_bitmap.cc:416
static char * path
Definition: mysqldump.cc:150
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
ValueType max(X &&first)
Definition: gtid.h:103
T clamp(U x)
Definition: ut0ut.h:412
const mysql_service_registry_t * r
Definition: pfs_example_plugin_employee.cc:86
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:238
This struct represents the number of bytes we expect to read for a table row.
Definition: cost_model.h:324
int64_t overflow_bytes
The number of bytes read from overflow pages.
Definition: cost_model.h:336
double overflow_probability
Definition: cost_model.h:343
int64_t record_bytes
The number of bytes read from the B-tree record.
Definition: cost_model.h:329
This class represents a subquery contained in some subclass of Item_subselect,.
Definition: item.h:859
See EstimateFilterCost.
Definition: cost_model.h:94
double init_cost_if_not_materialized
Initial cost before the filter can be applied for the first time.
Definition: cost_model.h:104
double cost_to_materialize
Cost of materializing all subqueries present in the filter.
Definition: cost_model.h:113
double cost_if_materialized
Cost of evaluating the filter for all rows if all subqueries in it have been materialized beforehand.
Definition: cost_model.h:109
double cost_if_not_materialized
Cost of evaluating the filter for all rows if subqueries are not materialized.
Definition: cost_model.h:98
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:80
RelationalExpression * expr
Definition: access_path.h:81
double selectivity
Definition: access_path.h:82
enum RelationalExpression::Type type
@ SEMIJOIN
Left semijoin.
Definition: relational_expression.h:153
@ MULTI_INNER_JOIN
Definition: relational_expression.h:175
@ STRAIGHT_INNER_JOIN
Definition: relational_expression.h:161
@ ANTIJOIN
Left antijoin.
Definition: relational_expression.h:156
@ INNER_JOIN
Definition: relational_expression.h:149
@ FULL_OUTER_JOIN
Definition: relational_expression.h:168
@ TABLE
Definition: relational_expression.h:177
@ LEFT_JOIN
Definition: relational_expression.h:150