MySQL 8.4.0
Source Code Documentation
sql_planner.h
Go to the documentation of this file.
1#ifndef SQL_PLANNER_INCLUDED
2#define SQL_PLANNER_INCLUDED
3
4/* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file sql/sql_planner.h
29 Join planner classes.
30*/
31
32#include <sys/types.h>
33
34#include "my_inttypes.h"
35#include "my_table_map.h"
36#include "sql_optimizer.h"
37
39class JOIN;
40class JOIN_TAB;
41class Key_use;
43class THD;
44struct TABLE;
45class Table_ref;
46struct POSITION;
47
49
50/**
51 Find the lateral dependencies of 'tab'.
52*/
54 return (tab.table_ref != nullptr && tab.table_ref->is_derived())
56 : 0;
57}
58
59/**
60 This class determines the optimal join order for tables within
61 a basic query block, ie a query specification clause, possibly extended
62 with semi-joined tables from embedded subqueries.
63
64 This class takes as prerequisite a join class where all dependencies among
65 tables have been sorted out, all possible access paths have been
66 sorted out, and all statistics information has been filled in.
67
68 The class has a sole public function that will calculate the most
69 optimal plan based on the inputs and the environment, such as prune level
70 and greedy optimizer search depth. For more information, see the
71 function headers for the private functions greedy_search(),
72 best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
73*/
74
76 public:
77 Optimize_table_order(THD *thd_arg, JOIN *join_arg, Table_ref *sjm_nest_arg);
79 /**
80 Entry point to table join order optimization.
81 For further description, see class header and private function headers.
82
83 @return false if successful, true if error
84 */
85 bool choose_table_order();
86
87 void recalculate_lateral_deps(uint first_tab_no);
88
89 void recalculate_lateral_deps_incrementally(uint first_tab_no);
90
91 private:
92 THD *const thd; // Pointer to current THD
93 JOIN *const join; // Pointer to the current plan being developed
94 const uint search_depth; // Maximum search depth to apply in greedy search
95 const uint prune_level; // pruning heuristics to be applied
96 // (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
97 /**
98 Bitmap of all join nests embedding the last table appended to the current
99 partial join.
100 */
102 /**
103 If non-NULL, we are optimizing a materialized semi-join nest.
104 If NULL, we are optimizing a complete join plan.
105 */
107 /**
108 When calculating a plan for a materialized semi-join nest,
109 best_access_path() needs to know not only the remaining tables within the
110 semi-join nest, but also all tables outside of this nest, because there may
111 be key references between the semi-join nest and the outside tables
112 that should not be considered when materializing the semi-join nest.
113 @c excluded_tables tracks these tables.
114 */
116 /*
117 No need to call advance_sj_state() when
118 1) there are no semijoin nests or
119 2) we are optimizing a materialized semijoin nest.
120 */
121 const bool has_sj;
122
123 /**
124 If true, find_best_ref() must go through all keys, no shortcutting
125 allowed.
126 */
128
129 /// True if we found a complete plan using only allowed semijoin strategies.
131
132 /**
133 False/true at start/end of choose_table_order().
134 Helps member functions know if current plan is in join->positions or
135 join->best_positions.
136 */
138
139 /// Set true when we have decided to return with a "good enough" plan.
140 bool use_best_so_far{false};
141
142 inline Key_use *find_best_ref(const JOIN_TAB *tab,
143 const table_map remaining_tables,
144 const uint idx, const double prefix_rowcount,
145 bool *found_condition,
146 table_map *ref_depends_map,
147 uint *used_key_parts);
148 double calculate_scan_cost(const JOIN_TAB *tab, const uint idx,
149 const Key_use *best_ref,
150 const double prefix_rowcount,
151 const bool found_condition,
152 const bool disable_jbuf,
153 double *rows_after_filtering,
154 Opt_trace_object *trace_access_scan);
155 void best_access_path(JOIN_TAB *tab, const table_map remaining_tables,
156 const uint idx, bool disable_jbuf,
157 const double prefix_rowcount, POSITION *pos);
159 const JOIN_TAB *s, table_map remaining_tables, uint idx,
160 double prefix_rowcount, POSITION *loose_scan_pos);
161 bool check_interleaving_with_nj(JOIN_TAB *next_tab);
162 void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab,
163 uint idx);
164 void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab);
165 void optimize_straight_join(table_map join_tables);
166 bool greedy_search(table_map remaining_tables);
167 bool best_extension_by_limited_search(table_map remaining_tables, uint idx,
168 uint current_search_depth);
170 uint idx,
171 uint current_search_depth);
172 bool consider_plan(uint idx, Opt_trace_object *trace_obj);
174 bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab,
175 table_map remaining_tables,
176 bool loosescan,
177 double *newcount,
178 double *newcost);
179 void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab,
180 table_map remaining_tables,
181 Table_ref *sjm_nest, double *newcount,
182 double *newcost);
183 void semijoin_mat_lookup_access_paths(uint last_inner, Table_ref *sjm_nest,
184 double *newcount, double *newcost);
185 void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab,
186 double *newcount, double *newcost);
187
188 double lateral_derived_cost(const JOIN_TAB *tab, const uint idx,
189 const double prefix_rowcount,
190 const Cost_model_server *cost_model);
191
193 bool plan_has_duplicate_tabs() const;
194 static uint determine_search_depth(uint search_depth, uint table_count);
195};
196
197void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
198 double *rowcount_arg);
199
200/**
201 Calculate 'Post read filtering' effect of JOIN::conds for table
202 'tab'. Only conditions that are not directly involved in the chosen
203 access method shall be included in the calculation of this 'Post
204 read filtering' effect.
205
206 The function first identifies fields that are directly used by the
207 access method. This includes columns used by range and ref access types,
208 and predicates on the identified columns (if any) will not be taken into
209 account when the filtering effect is calculated.
210
211 The function will then calculate the filtering effect of any predicate
212 that applies to 'tab' and is not depending on the columns used by the
213 access method. The source of information with highest accuracy is
214 always preferred and is as follows:
215 1) Row estimates from the range optimizer
216 2) Row estimates from index statistics (records per key)
217 3) Guesstimates
218
219 Thus, after identifying columns that are used by the access method,
220 the function will look for rows estimates made by the range optimizer.
221 If found, the estimates from the range optimizer are calculated into
222 the filtering effect.
223
224 The function then goes through JOIN::conds to get estimates from any
225 remaining predicate that applies to 'tab' and does not depend on any
226 tables that come later in the join sequence. Predicates that depend on
227 columns that are either used by the access method or used in the row
228 estimate from the range optimizer will not be considered in this phase.
229
230 @param tab The table condition filtering effect is calculated
231 for
232 @param keyuse Describes the 'ref' access method (if any) that is
233 chosen
234 @param used_tables Tables earlier in the join sequence than 'tab'
235 @param fanout The number of rows read by the chosen access
236 method for each row combination of previous tables
237 @param is_join_buffering Whether or not condition filtering is about
238 to be calculated for an access method using join
239 buffering.
240 @param write_to_trace Whether we should print the filtering effect calculated
241 by histogram statistics and the final aggregated filtering
242 effect to optimizer trace.
243 @param parent_trace The parent trace object where the final aggregated
244 filtering effect will be printed if "write_to_trace" is
245 set to true.
246
247 @return the 'post read filtering' effect (between 0 and 1) of
248 JOIN::conds
249*/
250float calculate_condition_filter(const JOIN_TAB *const tab,
251 const Key_use *const keyuse,
252 table_map used_tables, double fanout,
253 bool is_join_buffering, bool write_to_trace,
254 Opt_trace_object &parent_trace);
255
256/**
257 Find the cost for a ref lookup on the given index, assumed to return
258 “num_rows” rows. The cost will be capped by “worst_seeks”
259 (see find_worst_seeks()).
260 */
261double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno,
262 double num_rows, double worst_seeks);
263
265 public:
266 /**
267 "Less than" comparison function object used to compare two JOIN_TAB
268 objects based on a number of factors in this order:
269
270 - table before another table that depends on it (straight join,
271 outer join etc), then
272 - table before another table that depends on it to use a key
273 as access method, then
274 - table with smallest number of records first, then
275 - the table with lowest-value pointer (i.e., the one located
276 in the lowest memory address) first.
277
278 @param jt1 first JOIN_TAB object
279 @param jt2 second JOIN_TAB object
280
281 @note The order relation implemented by Join_tab_compare_default is not
282 transitive, i.e. it is possible to choose a, b and c such that
283 (a @< b) && (b @< c) but (c @< a). This is the case in the
284 following example:
285
286 a: dependent = @<none@> found_records = 3
287 b: dependent = @<none@> found_records = 4
288 c: dependent = b found_records = 2
289
290 a @< b: because a has fewer records
291 b @< c: because c depends on b (e.g outer join dependency)
292 c @< a: because c has fewer records
293
294 This implies that the result of a sort using the relation
295 implemented by Join_tab_compare_default () depends on the order in
296 which elements are compared, i.e. the result is
297 implementation-specific.
298
299 @return
300 true if jt1 is smaller than jt2, false otherwise
301 */
302 bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const;
303};
304
305#endif /* SQL_PLANNER_INCLUDED */
API for getting cost estimates for server operations that are not directly related to a table object.
Definition: opt_costmodel.h:54
Query optimization plan node.
Definition: sql_select.h:602
Table_ref * table_ref
points to table reference
Definition: sql_select.h:634
Definition: sql_optimizer.h:133
Definition: sql_planner.h:264
A Key_use represents an equality predicate of the form (table.column = val), where the column is inde...
Definition: sql_select.h:177
A JSON object (unordered set of key/value pairs).
Definition: opt_trace.h:802
This class determines the optimal join order for tables within a basic query block,...
Definition: sql_planner.h:75
JOIN *const join
Definition: sql_planner.h:93
bool got_final_plan
False/true at start/end of choose_table_order().
Definition: sql_planner.h:137
bool test_all_ref_keys
If true, find_best_ref() must go through all keys, no shortcutting allowed.
Definition: sql_planner.h:127
const bool has_sj
Definition: sql_planner.h:121
const Table_ref *const emb_sjm_nest
If non-NULL, we are optimizing a materialized semi-join nest.
Definition: sql_planner.h:106
nested_join_map cur_embedding_map
Bitmap of all join nests embedding the last table appended to the current partial join.
Definition: sql_planner.h:101
~Optimize_table_order()=default
bool found_plan_with_allowed_sj
True if we found a complete plan using only allowed semijoin strategies.
Definition: sql_planner.h:130
bool use_best_so_far
Set true when we have decided to return with a "good enough" plan.
Definition: sql_planner.h:140
THD *const thd
Definition: sql_planner.h:92
const uint prune_level
Definition: sql_planner.h:95
const uint search_depth
Definition: sql_planner.h:94
const table_map excluded_tables
When calculating a plan for a materialized semi-join nest, best_access_path() needs to know not only ...
Definition: sql_planner.h:115
table_map m_lateral_deps
If 'this' is body of lateral derived table: map of tables in the same FROM clause as this derived tab...
Definition: sql_lex.h:841
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Definition: table.h:2863
bool is_derived() const
Return true if this represents a derived table (an unnamed view)
Definition: table.h:3128
Query_expression * derived_query_expression() const
Return the query expression of a derived table or view.
Definition: table.h:3337
void semijoin_mat_lookup_access_paths(uint last_inner, Table_ref *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeLookup strategy.
Definition: sql_planner.cc:3904
void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab, double *newcount, double *newcost)
Find best access paths for semi-join DuplicateWeedout strategy and calculate rowcount and cost based ...
Definition: sql_planner.cc:3951
static uint determine_search_depth(uint search_depth, uint table_count)
Heuristic procedure to automatically guess a reasonable degree of exhaustiveness for the greedy searc...
Definition: sql_planner.cc:2079
bool greedy_search(table_map remaining_tables)
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
Definition: sql_planner.cc:2329
void recalculate_lateral_deps_incrementally(uint first_tab_no)
Update join->deps_of_remaining_lateral_derived_tables after adding JOIN_TAB first_tab_no-1 to the pla...
Definition: sql_planner.cc:4751
table_map calculate_lateral_deps_of_final_plan(uint tab_no) const
Calculate the lateral dependencies of the suffix of JOIN_TABs from tab_no to join->tables-1 in the fi...
Definition: sql_planner.cc:4695
double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno, double num_rows, double worst_seeks)
Find the cost for a ref lookup on the given index, assumed to return “num_rows” rows.
Definition: sql_planner.cc:144
bool check_interleaving_with_nj(JOIN_TAB *next_tab)
Check interleaving with an inner tables of an outer join for extension table.
Definition: sql_planner.cc:3591
void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab, table_map remaining_tables, Table_ref *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeScan strategy and calculate rowcount and cost based o...
Definition: sql_planner.cc:3821
void recalculate_lateral_deps(uint first_tab_no)
Set join->deps_of_remaining_lateral_derived_tables to the set of lateral dependencies of the tables i...
Definition: sql_planner.cc:4719
void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab, uint idx)
Do semi-join optimization step after we've added a new tab to join prefix.
Definition: sql_planner.cc:4109
bool best_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search.
Definition: sql_planner.cc:2720
void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg, double *rowcount_arg)
Calculate a cost of given partial join order.
Definition: sql_planner.cc:2450
float calculate_condition_filter(const JOIN_TAB *const tab, const Key_use *const keyuse, table_map used_tables, double fanout, bool is_join_buffering, bool write_to_trace, Opt_trace_object &parent_trace)
Calculate 'Post read filtering' effect of JOIN::conds for table 'tab'.
Definition: sql_planner.cc:1245
table_map eq_ref_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Heuristic utility used by best_extension_by_limited_search().
Definition: sql_planner.cc:3067
void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab)
Nested joins perspective: Remove the last table from the join order.
Definition: sql_planner.cc:4658
double lateral_derived_cost(const JOIN_TAB *tab, const uint idx, const double prefix_rowcount, const Cost_model_server *cost_model)
If table is a lateral derived table, calculates the "cost of materialization", which is the cost of a...
Definition: sql_planner.cc:917
bool fix_semijoin_strategies()
Fix semi-join strategies for the picked join order.
Definition: sql_planner.cc:3362
bool semijoin_loosescan_fill_driving_table_position(const JOIN_TAB *s, table_map remaining_tables, uint idx, double prefix_rowcount, POSITION *loose_scan_pos)
Fills a POSITION object of the driving table of a semi-join LooseScan range, with the cheapest access...
Definition: sql_planner.cc:1611
Optimize_table_order(THD *thd_arg, JOIN *join_arg, Table_ref *sjm_nest_arg)
Definition: sql_planner.cc:126
bool plan_has_duplicate_tabs() const
Check if any Table_ref appears twice in the plan (which is an error).
Definition: sql_planner.cc:4786
bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab, table_map remaining_tables, bool loosescan, double *newcount, double *newcost)
Find best access paths for semi-join FirstMatch or LooseScan strategy and calculate rowcount and cost...
Definition: sql_planner.cc:3666
bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const
"Less than" comparison function object used to compare two JOIN_TAB objects based on a number of fact...
Definition: sql_planner.cc:1856
bool consider_plan(uint idx, Opt_trace_object *trace_obj)
Cost calculation of another (partial-)QEP has been completed.
Definition: sql_planner.cc:2482
double calculate_scan_cost(const JOIN_TAB *tab, const uint idx, const Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan)
Calculate the cost of range/table/index scanning table 'tab'.
Definition: sql_planner.cc:771
Key_use * find_best_ref(const JOIN_TAB *tab, const table_map remaining_tables, const uint idx, const double prefix_rowcount, bool *found_condition, table_map *ref_depends_map, uint *used_key_parts)
Find the best index to do 'ref' access on for a table.
Definition: sql_planner.cc:208
void optimize_straight_join(table_map join_tables)
Select the best ways to access the tables in a query without reordering them.
Definition: sql_planner.cc:2119
void best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx, bool disable_jbuf, const double prefix_rowcount, POSITION *pos)
Find the best access path for an extension of a partial execution plan and add this path to the plan.
Definition: sql_planner.cc:983
bool choose_table_order()
Entry point to table join order optimization.
Definition: sql_planner.cc:1952
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:56
uint64_t table_map
Definition: my_table_map.h:30
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
std::string join(Container cont, const std::string &delim)
join elements of an container into a string separated by a delimiter.
Definition: string.h:151
ulonglong nested_join_map
Definition: nested_join.h:37
Classes used for query optimizations.
table_map get_lateral_deps(const JOIN_TAB &tab)
Find the lateral dependencies of 'tab'.
Definition: sql_planner.h:53
ulonglong nested_join_map
Definition: sql_planner.h:46
A position of table within a join order.
Definition: sql_select.h:355
Definition: table.h:1405