MySQL 8.1.0
Source Code Documentation
sql_planner.h
Go to the documentation of this file.
1#ifndef SQL_PLANNER_INCLUDED
2#define SQL_PLANNER_INCLUDED
3
4/* Copyright (c) 2000, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file sql/sql_planner.h
28 Join planner classes.
29*/
30
31#include <sys/types.h>
32
33#include "my_inttypes.h"
34#include "my_table_map.h"
35#include "sql_optimizer.h"
36
38class JOIN;
39class JOIN_TAB;
40class Key_use;
42class THD;
43struct TABLE;
44class Table_ref;
45struct POSITION;
46
48
49/**
50 Find the lateral dependencies of 'tab'.
51*/
53 return (tab.table_ref != nullptr && tab.table_ref->is_derived())
55 : 0;
56}
57
58/**
59 This class determines the optimal join order for tables within
60 a basic query block, ie a query specification clause, possibly extended
61 with semi-joined tables from embedded subqueries.
62
63 This class takes as prerequisite a join class where all dependencies among
64 tables have been sorted out, all possible access paths have been
65 sorted out, and all statistics information has been filled in.
66
67 The class has a sole public function that will calculate the most
68 optimal plan based on the inputs and the environment, such as prune level
69 and greedy optimizer search depth. For more information, see the
70 function headers for the private functions greedy_search(),
71 best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
72*/
73
75 public:
76 Optimize_table_order(THD *thd_arg, JOIN *join_arg, Table_ref *sjm_nest_arg);
78 /**
79 Entry point to table join order optimization.
80 For further description, see class header and private function headers.
81
82 @return false if successful, true if error
83 */
84 bool choose_table_order();
85
86 void recalculate_lateral_deps(uint first_tab_no);
87
88 void recalculate_lateral_deps_incrementally(uint first_tab_no);
89
90 private:
91 THD *const thd; // Pointer to current THD
92 JOIN *const join; // Pointer to the current plan being developed
93 const uint search_depth; // Maximum search depth to apply in greedy search
94 const uint prune_level; // pruning heuristics to be applied
95 // (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
96 /**
97 Bitmap of all join nests embedding the last table appended to the current
98 partial join.
99 */
101 /**
102 If non-NULL, we are optimizing a materialized semi-join nest.
103 If NULL, we are optimizing a complete join plan.
104 */
106 /**
107 When calculating a plan for a materialized semi-join nest,
108 best_access_path() needs to know not only the remaining tables within the
109 semi-join nest, but also all tables outside of this nest, because there may
110 be key references between the semi-join nest and the outside tables
111 that should not be considered when materializing the semi-join nest.
112 @c excluded_tables tracks these tables.
113 */
115 /*
116 No need to call advance_sj_state() when
117 1) there are no semijoin nests or
118 2) we are optimizing a materialized semijoin nest.
119 */
120 const bool has_sj;
121
122 /**
123 If true, find_best_ref() must go through all keys, no shortcutting
124 allowed.
125 */
127
128 /// True if we found a complete plan using only allowed semijoin strategies.
130
131 /**
132 False/true at start/end of choose_table_order().
133 Helps member functions know if current plan is in join->positions or
134 join->best_positions.
135 */
137
138 /// Set true when we have decided to return with a "good enough" plan.
139 bool use_best_so_far{false};
140
141 inline Key_use *find_best_ref(const JOIN_TAB *tab,
142 const table_map remaining_tables,
143 const uint idx, const double prefix_rowcount,
144 bool *found_condition,
145 table_map *ref_depends_map,
146 uint *used_key_parts);
147 double calculate_scan_cost(const JOIN_TAB *tab, const uint idx,
148 const Key_use *best_ref,
149 const double prefix_rowcount,
150 const bool found_condition,
151 const bool disable_jbuf,
152 double *rows_after_filtering,
153 Opt_trace_object *trace_access_scan);
154 void best_access_path(JOIN_TAB *tab, const table_map remaining_tables,
155 const uint idx, bool disable_jbuf,
156 const double prefix_rowcount, POSITION *pos);
158 const JOIN_TAB *s, table_map remaining_tables, uint idx,
159 double prefix_rowcount, POSITION *loose_scan_pos);
160 bool check_interleaving_with_nj(JOIN_TAB *next_tab);
161 void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab,
162 uint idx);
163 void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab);
164 void optimize_straight_join(table_map join_tables);
165 bool greedy_search(table_map remaining_tables);
166 bool best_extension_by_limited_search(table_map remaining_tables, uint idx,
167 uint current_search_depth);
169 uint idx,
170 uint current_search_depth);
171 bool consider_plan(uint idx, Opt_trace_object *trace_obj);
173 bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab,
174 table_map remaining_tables,
175 bool loosescan,
176 double *newcount,
177 double *newcost);
178 void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab,
179 table_map remaining_tables,
180 Table_ref *sjm_nest, double *newcount,
181 double *newcost);
182 void semijoin_mat_lookup_access_paths(uint last_inner, Table_ref *sjm_nest,
183 double *newcount, double *newcost);
184 void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab,
185 double *newcount, double *newcost);
186
187 double lateral_derived_cost(const JOIN_TAB *tab, const uint idx,
188 const double prefix_rowcount,
189 const Cost_model_server *cost_model);
190
192 bool plan_has_duplicate_tabs() const;
193 static uint determine_search_depth(uint search_depth, uint table_count);
194};
195
196void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
197 double *rowcount_arg);
198
199/**
200 Calculate 'Post read filtering' effect of JOIN::conds for table
201 'tab'. Only conditions that are not directly involved in the chosen
202 access method shall be included in the calculation of this 'Post
203 read filtering' effect.
204
205 The function first identifies fields that are directly used by the
206 access method. This includes columns used by range and ref access types,
207 and predicates on the identified columns (if any) will not be taken into
208 account when the filtering effect is calculated.
209
210 The function will then calculate the filtering effect of any predicate
211 that applies to 'tab' and is not depending on the columns used by the
212 access method. The source of information with highest accuracy is
213 always preferred and is as follows:
214 1) Row estimates from the range optimizer
215 2) Row estimates from index statistics (records per key)
216 3) Guesstimates
217
218 Thus, after identifying columns that are used by the access method,
219 the function will look for rows estimates made by the range optimizer.
220 If found, the estimates from the range optimizer are calculated into
221 the filtering effect.
222
223 The function then goes through JOIN::conds to get estimates from any
224 remaining predicate that applies to 'tab' and does not depend on any
225 tables that come later in the join sequence. Predicates that depend on
226 columns that are either used by the access method or used in the row
227 estimate from the range optimizer will not be considered in this phase.
228
229 @param tab The table condition filtering effect is calculated
230 for
231 @param keyuse Describes the 'ref' access method (if any) that is
232 chosen
233 @param used_tables Tables earlier in the join sequence than 'tab'
234 @param fanout The number of rows read by the chosen access
235 method for each row combination of previous tables
236 @param is_join_buffering Whether or not condition filtering is about
237 to be calculated for an access method using join
238 buffering.
239 @param write_to_trace Whether we should print the filtering effect calculated
240 by histogram statistics and the final aggregated filtering
241 effect to optimizer trace.
242 @param parent_trace The parent trace object where the final aggregated
243 filtering effect will be printed if "write_to_trace" is
244 set to true.
245
246 @return the 'post read filtering' effect (between 0 and 1) of
247 JOIN::conds
248*/
249float calculate_condition_filter(const JOIN_TAB *const tab,
250 const Key_use *const keyuse,
251 table_map used_tables, double fanout,
252 bool is_join_buffering, bool write_to_trace,
253 Opt_trace_object &parent_trace);
254
255/**
256 Find the cost for a ref lookup on the given index, assumed to return
257 “num_rows” rows. The cost will be capped by “worst_seeks”
258 (see find_worst_seeks()).
259 */
260double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno,
261 double num_rows, double worst_seeks);
262
264 public:
265 /**
266 "Less than" comparison function object used to compare two JOIN_TAB
267 objects based on a number of factors in this order:
268
269 - table before another table that depends on it (straight join,
270 outer join etc), then
271 - table before another table that depends on it to use a key
272 as access method, then
273 - table with smallest number of records first, then
274 - the table with lowest-value pointer (i.e., the one located
275 in the lowest memory address) first.
276
277 @param jt1 first JOIN_TAB object
278 @param jt2 second JOIN_TAB object
279
280 @note The order relation implemented by Join_tab_compare_default is not
281 transitive, i.e. it is possible to choose a, b and c such that
282 (a @< b) && (b @< c) but (c @< a). This is the case in the
283 following example:
284
285 a: dependent = @<none@> found_records = 3
286 b: dependent = @<none@> found_records = 4
287 c: dependent = b found_records = 2
288
289 a @< b: because a has fewer records
290 b @< c: because c depends on b (e.g outer join dependency)
291 c @< a: because c has fewer records
292
293 This implies that the result of a sort using the relation
294 implemented by Join_tab_compare_default () depends on the order in
295 which elements are compared, i.e. the result is
296 implementation-specific.
297
298 @return
299 true if jt1 is smaller than jt2, false otherwise
300 */
301 bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const;
302};
303
304#endif /* SQL_PLANNER_INCLUDED */
API for getting cost estimates for server operations that are not directly related to a table object.
Definition: opt_costmodel.h:51
Query optimization plan node.
Definition: sql_select.h:598
Table_ref * table_ref
points to table reference
Definition: sql_select.h:630
Definition: sql_optimizer.h:132
Definition: sql_planner.h:263
A Key_use represents an equality predicate of the form (table.column = val), where the column is inde...
Definition: sql_select.h:173
A JSON object (unordered set of key/value pairs).
Definition: opt_trace.h:798
This class determines the optimal join order for tables within a basic query block,...
Definition: sql_planner.h:74
JOIN *const join
Definition: sql_planner.h:92
bool got_final_plan
False/true at start/end of choose_table_order().
Definition: sql_planner.h:136
bool test_all_ref_keys
If true, find_best_ref() must go through all keys, no shortcutting allowed.
Definition: sql_planner.h:126
const bool has_sj
Definition: sql_planner.h:120
const Table_ref *const emb_sjm_nest
If non-NULL, we are optimizing a materialized semi-join nest.
Definition: sql_planner.h:105
nested_join_map cur_embedding_map
Bitmap of all join nests embedding the last table appended to the current partial join.
Definition: sql_planner.h:100
~Optimize_table_order()=default
bool found_plan_with_allowed_sj
True if we found a complete plan using only allowed semijoin strategies.
Definition: sql_planner.h:129
bool use_best_so_far
Set true when we have decided to return with a "good enough" plan.
Definition: sql_planner.h:139
THD *const thd
Definition: sql_planner.h:91
const uint prune_level
Definition: sql_planner.h:94
const uint search_depth
Definition: sql_planner.h:93
const table_map excluded_tables
When calculating a plan for a materialized semi-join nest, best_access_path() needs to know not only ...
Definition: sql_planner.h:114
table_map m_lateral_deps
If 'this' is body of lateral derived table: map of tables in the same FROM clause as this derived tab...
Definition: sql_lex.h:838
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:33
Definition: table.h:2800
bool is_derived() const
Return true if this represents a derived table (an unnamed view)
Definition: table.h:3065
Query_expression * derived_query_expression() const
Return the query expression of a derived table or view.
Definition: table.h:3274
void semijoin_mat_lookup_access_paths(uint last_inner, Table_ref *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeLookup strategy.
Definition: sql_planner.cc:3902
void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab, double *newcount, double *newcost)
Find best access paths for semi-join DuplicateWeedout strategy and calculate rowcount and cost based ...
Definition: sql_planner.cc:3949
static uint determine_search_depth(uint search_depth, uint table_count)
Heuristic procedure to automatically guess a reasonable degree of exhaustiveness for the greedy searc...
Definition: sql_planner.cc:2077
bool greedy_search(table_map remaining_tables)
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
Definition: sql_planner.cc:2327
void recalculate_lateral_deps_incrementally(uint first_tab_no)
Update join->deps_of_remaining_lateral_derived_tables after adding JOIN_TAB first_tab_no-1 to the pla...
Definition: sql_planner.cc:4749
table_map calculate_lateral_deps_of_final_plan(uint tab_no) const
Calculate the lateral dependencies of the suffix of JOIN_TABs from tab_no to join->tables-1 in the fi...
Definition: sql_planner.cc:4693
double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno, double num_rows, double worst_seeks)
Find the cost for a ref lookup on the given index, assumed to return “num_rows” rows.
Definition: sql_planner.cc:142
bool check_interleaving_with_nj(JOIN_TAB *next_tab)
Check interleaving with an inner tables of an outer join for extension table.
Definition: sql_planner.cc:3589
void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab, table_map remaining_tables, Table_ref *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeScan strategy and calculate rowcount and cost based o...
Definition: sql_planner.cc:3819
void recalculate_lateral_deps(uint first_tab_no)
Set join->deps_of_remaining_lateral_derived_tables to the set of lateral dependencies of the tables i...
Definition: sql_planner.cc:4717
void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab, uint idx)
Do semi-join optimization step after we've added a new tab to join prefix.
Definition: sql_planner.cc:4107
bool best_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search.
Definition: sql_planner.cc:2718
void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg, double *rowcount_arg)
Calculate a cost of given partial join order.
Definition: sql_planner.cc:2448
float calculate_condition_filter(const JOIN_TAB *const tab, const Key_use *const keyuse, table_map used_tables, double fanout, bool is_join_buffering, bool write_to_trace, Opt_trace_object &parent_trace)
Calculate 'Post read filtering' effect of JOIN::conds for table 'tab'.
Definition: sql_planner.cc:1243
table_map eq_ref_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Heuristic utility used by best_extension_by_limited_search().
Definition: sql_planner.cc:3065
void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab)
Nested joins perspective: Remove the last table from the join order.
Definition: sql_planner.cc:4656
double lateral_derived_cost(const JOIN_TAB *tab, const uint idx, const double prefix_rowcount, const Cost_model_server *cost_model)
If table is a lateral derived table, calculates the "cost of materialization", which is the cost of a...
Definition: sql_planner.cc:915
bool fix_semijoin_strategies()
Fix semi-join strategies for the picked join order.
Definition: sql_planner.cc:3360
bool semijoin_loosescan_fill_driving_table_position(const JOIN_TAB *s, table_map remaining_tables, uint idx, double prefix_rowcount, POSITION *loose_scan_pos)
Fills a POSITION object of the driving table of a semi-join LooseScan range, with the cheapest access...
Definition: sql_planner.cc:1609
Optimize_table_order(THD *thd_arg, JOIN *join_arg, Table_ref *sjm_nest_arg)
Definition: sql_planner.cc:124
bool plan_has_duplicate_tabs() const
Check if any Table_ref appears twice in the plan (which is an error).
Definition: sql_planner.cc:4784
bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab, table_map remaining_tables, bool loosescan, double *newcount, double *newcost)
Find best access paths for semi-join FirstMatch or LooseScan strategy and calculate rowcount and cost...
Definition: sql_planner.cc:3664
bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const
"Less than" comparison function object used to compare two JOIN_TAB objects based on a number of fact...
Definition: sql_planner.cc:1854
bool consider_plan(uint idx, Opt_trace_object *trace_obj)
Cost calculation of another (partial-)QEP has been completed.
Definition: sql_planner.cc:2480
double calculate_scan_cost(const JOIN_TAB *tab, const uint idx, const Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan)
Calculate the cost of range/table/index scanning table 'tab'.
Definition: sql_planner.cc:769
Key_use * find_best_ref(const JOIN_TAB *tab, const table_map remaining_tables, const uint idx, const double prefix_rowcount, bool *found_condition, table_map *ref_depends_map, uint *used_key_parts)
Find the best index to do 'ref' access on for a table.
Definition: sql_planner.cc:206
void optimize_straight_join(table_map join_tables)
Select the best ways to access the tables in a query without reordering them.
Definition: sql_planner.cc:2117
void best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx, bool disable_jbuf, const double prefix_rowcount, POSITION *pos)
Find the best access path for an extension of a partial execution plan and add this path to the plan.
Definition: sql_planner.cc:981
bool choose_table_order()
Entry point to table join order optimization.
Definition: sql_planner.cc:1950
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:55
uint64_t table_map
Definition: my_table_map.h:29
static PFS_engine_table_share_proxy table
Definition: pfs.cc:60
std::string join(Container cont, const std::string &delim)
join elements of an container into a string separated by a delimiter.
Definition: string.h:150
ulonglong nested_join_map
Definition: nested_join.h:36
Classes used for query optimizations.
table_map get_lateral_deps(const JOIN_TAB &tab)
Find the lateral dependencies of 'tab'.
Definition: sql_planner.h:52
ulonglong nested_join_map
Definition: sql_planner.h:45
A position of table within a join order.
Definition: sql_select.h:351
Definition: table.h:1394