MySQL 8.0.29
Source Code Documentation
cost_model.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
24#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25
26#include "my_base.h"
29#include "sql/mem_root_array.h"
30
31struct AccessPath;
33class Item;
34class Query_block;
35class THD;
36struct TABLE;
37
38// These are extremely arbitrary cost model constants. We should revise them
39// based on actual query times (possibly using linear regression?), and then
40// put them into the cost model to make them user-tunable.
41constexpr double kApplyOneFilterCost = 0.1;
42constexpr double kAggregateOneRowCost = 0.1;
43constexpr double kSortOneRowCost = 0.1;
44constexpr double kHashBuildOneRowCost = 0.1;
45constexpr double kHashProbeOneRowCost = 0.1;
46constexpr double kHashReturnOneRowCost = 0.07;
47constexpr double kMaterializeOneRowCost = 0.1;
48constexpr double kWindowOneRowCost = 0.1;
49
50/// See EstimateFilterCost.
51struct FilterCost {
52 // Cost of evaluating the filter if nothing in particular is done with it.
54
55 // Cost of evaluating the filter if all subqueries in it have been
56 // materialized beforehand. If there are no subqueries in the condition,
57 // equals cost_if_not_materialized.
59
60 // Cost of materializing all subqueries present in the filter.
61 // If there are no subqueries in the condition, equals zero.
63};
64
65/// Used internally by EstimateFilterCost() only.
66void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows,
67 FilterCost *cost);
68
69/**
70 Estimate the cost of evaluating “condition”, “num_rows” times.
71 This is a fairly rudimentary estimation, _but_ it includes the cost
72 of any subqueries that may be present and that need evaluation.
73 */
74FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition,
75 Query_block *outer_query_block);
76
77/**
78 A cheaper overload of EstimateFilterCost() that assumes that all
79 contained subqueries have already been extracted (ie., it skips the
80 walking, which can be fairly expensive). This data is typically
81 computed by FindContainedSubqueries().
82 */
84 THD *thd, double num_rows,
85 const Mem_root_array<ContainedSubquery> &contained_subqueries) {
86 FilterCost cost{0.0, 0.0, 0.0};
88 cost.cost_if_materialized = num_rows * kApplyOneFilterCost;
89
90 for (const ContainedSubquery &subquery : contained_subqueries) {
91 AddCost(thd, subquery, num_rows, &cost);
92 }
93 return cost;
94}
95
96double EstimateCostForRefAccess(THD *thd, TABLE *table, unsigned key_idx,
97 double num_output_rows);
100void EstimateAggregateCost(AccessPath *path, const Query_block *query_block);
102
103inline double FindOutputRowsForJoin(double left_rows, double right_rows,
104 const JoinPredicate *edge) {
105 double fanout = right_rows * edge->selectivity;
107 // For outer joins, every outer row produces at least one row (if none
108 // are matching, we get a NULL-complemented row).
109 // Note that this can cause inconsistent row counts; see bug #33550360
110 // and/or JoinHypergraph::has_reordered_left_joins.
111 fanout = std::max(fanout, 1.0);
112 } else if (edge->expr->type == RelationalExpression::SEMIJOIN) {
113 // Semi- and antijoin estimation is pretty tricky, since we want isn't
114 // really selectivity; we want the probability that at least one row
115 // is matching, which is something else entirely. However, given that
116 // we only have selectivity to work with, we don't really have anything
117 // better than to estimate it as a normal join and cap the result
118 // at selectivity 1.0 (ie., each outer row generates at most one inner row).
119 fanout = std::min(fanout, 1.0);
120 } else if (edge->expr->type == RelationalExpression::ANTIJOIN) {
121 // Antijoin are estimated as simply the opposite of semijoin (see above),
122 // but wrongly estimating 0 rows (or, of course, a negative amount) could be
123 // really bad, so we assume at least 10% coming out as a fudge factor.
124 // It's better to estimate too high than too low here.
125 fanout = std::max(1.0 - fanout, 0.1);
126 }
127 return left_rows * fanout;
128}
129
130#endif // SQL_JOIN_OPTIMIZER_COST_MODEL_H_
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:802
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:421
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1124
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:945
constexpr double kHashReturnOneRowCost
Definition: cost_model.h:46
double FindOutputRowsForJoin(double left_rows, double right_rows, const JoinPredicate *edge)
Definition: cost_model.h:103
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:117
void EstimateAggregateCost(AccessPath *path, const Query_block *query_block)
Definition: cost_model.cc:211
constexpr double kHashBuildOneRowCost
Definition: cost_model.h:44
constexpr double kWindowOneRowCost
Definition: cost_model.h:48
void EstimateSortCost(AccessPath *path, ha_rows limit_rows=HA_POS_ERROR)
Definition: cost_model.cc:87
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:143
constexpr double kApplyOneFilterCost
Definition: cost_model.h:41
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:226
constexpr double kMaterializeOneRowCost
Definition: cost_model.h:47
double EstimateCostForRefAccess(THD *thd, TABLE *table, unsigned key_idx, double num_output_rows)
Definition: cost_model.cc:54
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Definition: cost_model.cc:158
constexpr double kHashProbeOneRowCost
Definition: cost_model.h:45
constexpr double kSortOneRowCost
Definition: cost_model.h:43
constexpr double kAggregateOneRowCost
Definition: cost_model.h:42
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1139
#define HA_POS_ERROR
Definition: my_base.h:1141
static char * path
Definition: mysqldump.cc:130
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:188
Definition: relational_expression.h:38
See EstimateFilterCost.
Definition: cost_model.h:51
double cost_to_materialize
Definition: cost_model.h:62
double cost_if_materialized
Definition: cost_model.h:58
double cost_if_not_materialized
Definition: cost_model.h:53
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:74
RelationalExpression * expr
Definition: access_path.h:75
double selectivity
Definition: access_path.h:76
enum RelationalExpression::Type type
@ SEMIJOIN
Definition: relational_expression.h:96
@ ANTIJOIN
Definition: relational_expression.h:97
@ LEFT_JOIN
Definition: relational_expression.h:95
Definition: table.h:1394