24#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
134 THD *thd,
double num_rows,
141 AddCost(thd, subquery, num_rows, &cost);
252 return left_rows * std::max(right_rows * edge->
selectivity, 1.0);
291 if (
table->s->is_missing_primary_key())
return false;
292 return key_idx ==
table->s->primary_key &&
293 table->file->primary_key_is_clustered();
362 int64_t max_row_size);
371 constexpr unsigned kMinEstimatedBlockSize = 4096;
372 constexpr unsigned kMaxEstimatedBlockSize = 65536;
373 return std::clamp(
table->file->stats.block_size, kMinEstimatedBlockSize,
374 kMaxEstimatedBlockSize);
409 int64_t max_bytes{0};
411 for (uint i = 0; i <
table->s->fields; i++) {
413 max_bytes +=
table->field[i]->field_length;
418 return {.record_bytes =
422 .overflow_probability = 0.0};
447 table->key_info[key_idx].key_length +
table->file->ref_length;
476 double records_per_page =
477 std::max(1.0,
static_cast<double>(block_size) / bytes_per_row);
488 double r = 1.0 + records_per_page;
489 while (r < table->
file->stats.records) {
490 r =
r * (1.0 + records_per_page);
527inline double RowReadCost(
double num_rows,
double fields_read_per_row,
528 double bytes_per_row) {
547 num_rows, fields_read_per_row,
570 constexpr double kDefaultFieldsReadFromCoveringIndex = 2;
571 double fields_read_per_row =
table->covering_keys.is_set(key_idx)
573 : kDefaultFieldsReadFromCoveringIndex;
576 return RowReadCost(num_rows, fields_read_per_row, bytes_per_row) +
643 assert(key_idx < table->s->keys);
646 return 0.5 * (cost_with_ahi + cost_without_ahi);
678 double num_output_rows);
691 1.0,
table->file->stats.records);
704 double num_output_rows) {
709 constexpr double kRefAccessCostDiscount = 0.05;
710 return (1.0 - kRefAccessCostDiscount) *
712 1.0, num_output_rows);
constexpr double kUnknownRowCount
To indicate that a row estimate is not yet made.
Definition: access_path.h:196
A wrapper class which provides array bounds checking.
Definition: sql_array.h:47
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:930
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1170
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Hypergraph optimizer cost constants.
constexpr double kReadOneFieldCost
Cost of per field in the read set.
Definition: cost_constants.h:85
constexpr double kApplyOneFilterCost
Cost of evaluating one filter on one row.
Definition: cost_constants.h:117
constexpr double kReadOneRowCost
Fixed cost of reading a row from the storage engine into the record buffer.
Definition: cost_constants.h:81
constexpr double kIndexLookupPageCost
The cost per page that is visited when performing an index lookup in an InnoDB B-tree.
Definition: cost_constants.h:129
constexpr double kIndexLookupFixedCost
Fixed cost of an index lookup when AHI is enabled (default).
Definition: cost_constants.h:132
constexpr double kReadOneByteCost
Overhead per byte when reading a row.
Definition: cost_constants.h:95
double RowReadCost(double num_rows, double fields_read_per_row, double bytes_per_row)
Computes the expected cost of reading a number of rows.
Definition: cost_model.h:527
constexpr ha_rows kRowEstimateFallback
A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for ...
Definition: cost_model.h:55
double EstimateRefAccessCost(const TABLE *table, unsigned key_idx, double num_output_rows)
Estimates the cost of an index lookup (ref access).
Definition: cost_model.h:703
double TableAccessIOCost(const TABLE *table, double num_rows, BytesPerTableRow row_size)
Calculate the IO-cost of reading 'num_rows' rows from 'table'.
Definition: cost_model.cc:418
void EstimateSortCost(THD *thd, AccessPath *path, double distinct_rows=kUnknownRowCount)
Estimate costs and output rows for a SORT AccessPath.
Definition: cost_model.cc:605
void EstimateTemptableAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate the costs and row count for a Temp table Aggregate AccessPath.
Definition: cost_model.cc:1635
RangeScanType
Type of range scan operation.
Definition: cost_model.h:650
@ kSingleRange
Plain range scan, without the MRR optimization.
@ kMultiRange
Using the MRR optimization.
BytesPerTableRow EstimateBytesPerRowTable(const TABLE *table)
Estimates the number of bytes that MySQL must process when reading a row from a table,...
Definition: cost_model.h:408
void EstimateWindowCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1679
bool IsClusteredPrimaryKey(const TABLE *table, unsigned key_idx)
Determines whether a given key on a table is both clustered and primary.
Definition: cost_model.h:290
double CoveringIndexAccessIOCost(const TABLE *table, unsigned key_idx, double num_rows)
Calculate the IO-cost of doing a lookup on index 'key_idx' on 'table' and then read 'num_rows' rows.
Definition: cost_model.cc:473
double EstimateTableScanCost(const TABLE *table)
Estimates the cost of a full table scan.
Definition: cost_model.h:588
constexpr size_t kMaxItemLengthEstimate
When we make cost estimates, we use this as the maximal length the values we get from evaluating an I...
Definition: cost_model.h:50
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:662
double EstimateIndexScanCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index scan.
Definition: cost_model.h:689
void EstimateLimitOffsetCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1601
double RowReadCostIndex(const TABLE *table, unsigned key_idx, double num_rows)
Computes the cost of reading a number of rows from an index.
Definition: cost_model.h:562
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:1544
constexpr unsigned kMinEstimatedBytesPerRow
The minimum number of bytes to return for row length estimates.
Definition: cost_model.h:299
int IndexHeight(const TABLE *table, unsigned key_idx)
Estimates the height of a B-tree index.
Definition: cost_model.h:467
double EstimateIndexRangeScanCost(const TABLE *table, unsigned key_idx, RangeScanType scan_type, double num_ranges, double num_output_rows)
Estimates the cost of an index range scan.
Definition: cost_model.cc:518
double FindOutputRowsForJoin(THD *thd, double left_rows, double right_rows, const JoinPredicate *edge)
Estimate the number of output rows from joining two relations.
Definition: cost_model.h:243
void EstimateUpdateRowsCost(AccessPath *path)
Definition: cost_model.cc:1561
constexpr double kBlockFillFactor
This is the estimated fraction of an (innodb) block that is in use (i.e.
Definition: cost_model.h:90
unsigned ClampedBlockSize(const TABLE *table)
We clamp the block size to lie in the interval between the max and min allowed block size for InnoDB ...
Definition: cost_model.h:370
constexpr double kIOStartCost
We model the IO cost for InnoDB tables with the DYNAMIC row format.
Definition: cost_model.h:83
BytesPerTableRow EstimateBytesPerRowWideTable(const TABLE *table, int64_t max_row_size)
Estimate the average number of bytes that we need to read from the storage engine when reading a row ...
Definition: cost_model.cc:317
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Provide row estimates and costs for a MATERIALIZE AccessPath.
Definition: cost_model.cc:827
double IndexLookupCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index lookup.
Definition: cost_model.h:642
void EstimateStreamCost(THD *thd, AccessPath *path)
Estimate the costs and row count for a STREAM AccessPath.
Definition: cost_model.cc:1578
double EstimateDistinctRows(THD *thd, double child_rows, Bounds_checked_array< const Item *const > terms)
Estimate the number of rows with a distinct combination of values for 'terms'.
void EstimateAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate costs and result row count for an aggregate operation.
Definition: cost_model.cc:1522
unsigned EstimateBytesPerRowIndex(const TABLE *table, unsigned key_idx)
Estimates the number of bytes that MySQL must process when reading a row from a secondary index,...
Definition: cost_model.h:439
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:703
constexpr unsigned kMaxEstimatedBytesPerRow
The maximum number of bytes to return for row length estimates.
Definition: cost_model.h:309
double EstimateSemijoinFanOut(THD *thd, double right_rows, const JoinPredicate &edge)
Estimate the fan out for a left semijoin or a left antijoin.
Definition: cost_model.cc:1689
double RowReadCostTable(const TABLE *table, double num_rows)
Computes the cost of reading a number of rows from a table.
Definition: cost_model.h:543
constexpr double kIOByteCost
The additional cost of reading an extra byte from disk.
Definition: cost_model.h:86
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1141
uint bitmap_bits_set(const MY_BITMAP *map)
Definition: my_bitmap.cc:416
static char * path
Definition: mysqldump.cc:149
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
T clamp(U x)
Definition: ut0ut.h:415
const mysql_service_registry_t * r
Definition: pfs_example_plugin_employee.cc:86
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:238
This struct represents the number of bytes we expect to read for a table row.
Definition: cost_model.h:321
int64_t overflow_bytes
The number of bytes read from overflow pages.
Definition: cost_model.h:333
double overflow_probability
Definition: cost_model.h:340
int64_t record_bytes
The number of bytes read from the B-tree record.
Definition: cost_model.h:326
This class represents a subquery contained in some subclass of Item_subselect,.
Definition: item.h:862
See EstimateFilterCost.
Definition: cost_model.h:93
double init_cost_if_not_materialized
Initial cost before the filter can be applied for the first time.
Definition: cost_model.h:103
double cost_to_materialize
Cost of materializing all subqueries present in the filter.
Definition: cost_model.h:112
double cost_if_materialized
Cost of evaluating the filter for all rows if all subqueries in it have been materialized beforehand.
Definition: cost_model.h:108
double cost_if_not_materialized
Cost of evaluating the filter for all rows if subqueries are not materialized.
Definition: cost_model.h:97
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:79
RelationalExpression * expr
Definition: access_path.h:80
double selectivity
Definition: access_path.h:81
enum RelationalExpression::Type type
@ SEMIJOIN
Left semijoin.
Definition: relational_expression.h:153
@ MULTI_INNER_JOIN
Definition: relational_expression.h:175
@ STRAIGHT_INNER_JOIN
Definition: relational_expression.h:161
@ ANTIJOIN
Left antijoin.
Definition: relational_expression.h:156
@ INNER_JOIN
Definition: relational_expression.h:149
@ FULL_OUTER_JOIN
Definition: relational_expression.h:168
@ TABLE
Definition: relational_expression.h:177
@ LEFT_JOIN
Definition: relational_expression.h:150