MySQL 8.0.32
Source Code Documentation
cost_model.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2022, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
24#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25
26#include "my_base.h"
29#include "sql/mem_root_array.h"
30
31struct AccessPath;
33class Item;
34class Query_block;
35class THD;
36struct TABLE;
37
38/**
39 When we make cost estimates, we use this as the maximal length the
40 values we get from evaluating an Item (in bytes). Actual values of
41 e.g. blobs may be much longer, but even so we use this as an upper
42 limit when doing cost calculations. (For context, @see Item#max_length .)
43*/
44constexpr size_t kMaxItemLengthEstimate = 4096;
45
46// These are extremely arbitrary cost model constants. We should revise them
47// based on actual query times (possibly using linear regression?), and then
48// put them into the cost model to make them user-tunable.
49constexpr double kApplyOneFilterCost = 0.1;
50constexpr double kAggregateOneRowCost = 0.1;
51constexpr double kSortOneRowCost = 0.1;
52constexpr double kHashBuildOneRowCost = 0.1;
53constexpr double kHashProbeOneRowCost = 0.1;
54constexpr double kHashReturnOneRowCost = 0.07;
55constexpr double kMaterializeOneRowCost = 0.1;
56constexpr double kWindowOneRowCost = 0.1;
57
58/// See EstimateFilterCost.
59struct FilterCost {
60 /// Cost of evaluating the filter for all rows if subqueries are not
61 /// materialized. (Note that this includes the contribution from
62 /// init_cost_if_not_materialized.)
64
65 /// Initial cost before the filter can be applied for the first time.
66 /// Typically the cost of executing 'independent subquery' in queries like:
67 /// "SELECT * FROM tab WHERE field = <independent subquery>".
68 /// (That corresponds to the Item_singlerow_subselect class.)
70
71 /// Cost of evaluating the filter for all rows if all subqueries in
72 /// it have been materialized beforehand. If there are no subqueries
73 /// in the condition, equals cost_if_not_materialized.
75
76 /// Cost of materializing all subqueries present in the filter.
77 /// If there are no subqueries in the condition, equals zero.
79};
80
81/// Used internally by EstimateFilterCost() only.
82void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows,
83 FilterCost *cost);
84
85/**
86 Estimate the cost of evaluating “condition”, “num_rows” times.
87 This is a fairly rudimentary estimation, _but_ it includes the cost
88 of any subqueries that may be present and that need evaluation.
89 */
90FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition,
91 const Query_block *outer_query_block);
92
93/**
94 A cheaper overload of EstimateFilterCost() that assumes that all
95 contained subqueries have already been extracted (ie., it skips the
96 walking, which can be fairly expensive). This data is typically
97 computed by FindContainedSubqueries().
98 */
100 THD *thd, double num_rows,
101 const Mem_root_array<ContainedSubquery> &contained_subqueries) {
102 FilterCost cost;
105
106 for (const ContainedSubquery &subquery : contained_subqueries) {
107 AddCost(thd, subquery, num_rows, &cost);
108 }
109 return cost;
110}
111
112double EstimateCostForRefAccess(THD *thd, TABLE *table, unsigned key_idx,
113 double num_output_rows);
116
117/**
118 Estimate costs and result row count for an aggregate operation.
119 @param[in,out] path The AGGREGATE path.
120 @param[in] query_block The Query_block to which 'path' belongs.
121 @param[in,out] trace Optimizer trace text.
122 */
124 std::string *trace = nullptr);
127
128/// Estimate the costs and row count for a STREAM AccessPath.
130
131/// Estimate the costs and row count for a LIMIT_OFFSET AccessPath.
133
134inline double FindOutputRowsForJoin(double left_rows, double right_rows,
135 const JoinPredicate *edge) {
136 double fanout = right_rows * edge->selectivity;
138 // For outer joins, every outer row produces at least one row (if none
139 // are matching, we get a NULL-complemented row).
140 // Note that this can cause inconsistent row counts; see bug #33550360
141 // and/or JoinHypergraph::has_reordered_left_joins.
142 fanout = std::max(fanout, 1.0);
143 } else if (edge->expr->type == RelationalExpression::SEMIJOIN) {
144 // Semi- and antijoin estimation is pretty tricky, since we want isn't
145 // really selectivity; we want the probability that at least one row
146 // is matching, which is something else entirely. However, given that
147 // we only have selectivity to work with, we don't really have anything
148 // better than to estimate it as a normal join and cap the result
149 // at selectivity 1.0 (ie., each outer row generates at most one inner row).
150 // Note that this can cause inconsistent row counts; see bug #33550360 and
151 // CostingReceiver::has_semijoin_with_possibly_clamped_child.
152 fanout = std::min(fanout, 1.0);
153 } else if (edge->expr->type == RelationalExpression::ANTIJOIN) {
154 // Antijoin are estimated as simply the opposite of semijoin (see above),
155 // but wrongly estimating 0 rows (or, of course, a negative amount) could be
156 // really bad, so we assume at least 10% coming out as a fudge factor.
157 // It's better to estimate too high than too low here.
158 fanout = std::max(1.0 - fanout, 0.1);
159 }
160 return left_rows * fanout;
161}
162
163#endif // SQL_JOIN_OPTIMIZER_COST_MODEL_H_
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:850
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:425
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1153
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:33
constexpr double kHashReturnOneRowCost
Definition: cost_model.h:54
double FindOutputRowsForJoin(double left_rows, double right_rows, const JoinPredicate *edge)
Definition: cost_model.h:134
constexpr size_t kMaxItemLengthEstimate
When we make cost estimates, we use this as the maximal length the values we get from evaluating an I...
Definition: cost_model.h:44
void EstimateAggregateCost(AccessPath *path, const Query_block *query_block, std::string *trace=nullptr)
Estimate costs and result row count for an aggregate operation.
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:122
constexpr double kHashBuildOneRowCost
Definition: cost_model.h:52
void EstimateLimitOffsetCost(AccessPath *path)
Estimate the costs and row count for a LIMIT_OFFSET AccessPath.
Definition: cost_model.cc:801
constexpr double kWindowOneRowCost
Definition: cost_model.h:56
constexpr double kApplyOneFilterCost
Definition: cost_model.h:49
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:751
constexpr double kMaterializeOneRowCost
Definition: cost_model.h:55
void EstimateStreamCost(AccessPath *path)
Estimate the costs and row count for a STREAM AccessPath.
Definition: cost_model.cc:785
double EstimateCostForRefAccess(THD *thd, TABLE *table, unsigned key_idx, double num_output_rows)
Definition: cost_model.cc:58
void EstimateUpdateRowsCost(AccessPath *path)
Definition: cost_model.cc:768
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Definition: cost_model.cc:178
constexpr double kHashProbeOneRowCost
Definition: cost_model.h:53
constexpr double kSortOneRowCost
Definition: cost_model.h:51
constexpr double kAggregateOneRowCost
Definition: cost_model.h:50
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:163
void EstimateSortCost(AccessPath *path)
Definition: cost_model.cc:91
This file includes constants used by all storage engines.
static char * path
Definition: mysqldump.cc:133
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:189
This class represents a subquery contained in some subclass of Item_subselect,.
Definition: item.h:782
See EstimateFilterCost.
Definition: cost_model.h:59
double init_cost_if_not_materialized
Initial cost before the filter can be applied for the first time.
Definition: cost_model.h:69
double cost_to_materialize
Cost of materializing all subqueries present in the filter.
Definition: cost_model.h:78
double cost_if_materialized
Cost of evaluating the filter for all rows if all subqueries in it have been materialized beforehand.
Definition: cost_model.h:74
double cost_if_not_materialized
Cost of evaluating the filter for all rows if subqueries are not materialized.
Definition: cost_model.h:63
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:75
RelationalExpression * expr
Definition: access_path.h:76
double selectivity
Definition: access_path.h:77
enum RelationalExpression::Type type
@ SEMIJOIN
Definition: relational_expression.h:91
@ ANTIJOIN
Definition: relational_expression.h:92
@ LEFT_JOIN
Definition: relational_expression.h:90
Definition: table.h:1395