MySQL  8.0.27
Source Code Documentation
sql_planner.h
Go to the documentation of this file.
1 #ifndef SQL_PLANNER_INCLUDED
2 #define SQL_PLANNER_INCLUDED
3 
4 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file sql/sql_planner.h
28  Join planner classes.
29 */
30 
31 #include <sys/types.h>
32 
33 #include "my_inttypes.h"
34 #include "my_table_map.h"
35 #include "sql_optimizer.h"
36 
37 class Cost_model_server;
38 class JOIN;
39 class JOIN_TAB;
40 class Key_use;
41 class Opt_trace_object;
42 class THD;
43 struct TABLE;
44 struct TABLE_LIST;
45 struct POSITION;
46 
48 
49 /**
50  Find the lateral dependencies of 'tab'.
51 */
52 inline table_map get_lateral_deps(const JOIN_TAB &tab) {
53  return (tab.table_ref != nullptr && tab.table_ref->is_derived())
55  : 0;
56 }
57 
58 /**
59  This class determines the optimal join order for tables within
60  a basic query block, ie a query specification clause, possibly extended
61  with semi-joined tables from embedded subqueries.
62 
63  This class takes as prerequisite a join class where all dependencies among
64  tables have been sorted out, all possible access paths have been
65  sorted out, and all statistics information has been filled in.
66 
67  The class has a sole public function that will calculate the most
68  optimal plan based on the inputs and the environment, such as prune level
69  and greedy optimizer search depth. For more information, see the
70  function headers for the private functions greedy_search(),
71  best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
72 */
73 
75  public:
76  Optimize_table_order(THD *thd_arg, JOIN *join_arg, TABLE_LIST *sjm_nest_arg);
77  ~Optimize_table_order() = default;
78  /**
79  Entry point to table join order optimization.
80  For further description, see class header and private function headers.
81 
82  @return false if successful, true if error
83  */
84  bool choose_table_order();
85 
86  void recalculate_lateral_deps(uint first_tab_no);
87 
89 
90  private:
91  THD *const thd; // Pointer to current THD
92  JOIN *const join; // Pointer to the current plan being developed
93  const uint search_depth; // Maximum search depth to apply in greedy search
94  const uint prune_level; // pruning heuristics to be applied
95  // (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
96  /**
97  Bitmap of all join nests embedding the last table appended to the current
98  partial join.
99  */
101  /**
102  If non-NULL, we are optimizing a materialized semi-join nest.
103  If NULL, we are optimizing a complete join plan.
104  */
105  const TABLE_LIST *const emb_sjm_nest;
106  /**
107  When calculating a plan for a materialized semi-join nest,
108  best_access_path() needs to know not only the remaining tables within the
109  semi-join nest, but also all tables outside of this nest, because there may
110  be key references between the semi-join nest and the outside tables
111  that should not be considered when materializing the semi-join nest.
112  @c excluded_tables tracks these tables.
113  */
115  /*
116  No need to call advance_sj_state() when
117  1) there are no semijoin nests or
118  2) we are optimizing a materialized semijoin nest.
119  */
120  const bool has_sj;
121 
122  /**
123  If true, find_best_ref() must go through all keys, no shortcutting
124  allowed.
125  */
127 
128  /// True if we found a complete plan using only allowed semijoin strategies.
130 
131  /**
132  False/true at start/end of choose_table_order().
133  Helps member functions know if current plan is in join->positions or
134  join->best_positions.
135  */
137 
138  /// Set true when we have decided to return with a "good enough" plan.
139  bool use_best_so_far{false};
140 
141  inline Key_use *find_best_ref(const JOIN_TAB *tab,
142  const table_map remaining_tables,
143  const uint idx, const double prefix_rowcount,
144  bool *found_condition,
145  table_map *ref_depends_map,
146  uint *used_key_parts);
147  double calculate_scan_cost(const JOIN_TAB *tab, const uint idx,
148  const Key_use *best_ref,
149  const double prefix_rowcount,
150  const bool found_condition,
151  const bool disable_jbuf,
152  double *rows_after_filtering,
153  Opt_trace_object *trace_access_scan);
154  void best_access_path(JOIN_TAB *tab, const table_map remaining_tables,
155  const uint idx, bool disable_jbuf,
156  const double prefix_rowcount, POSITION *pos);
158  const JOIN_TAB *s, table_map remaining_tables, uint idx,
159  double prefix_rowcount, POSITION *loose_scan_pos);
160  bool check_interleaving_with_nj(JOIN_TAB *next_tab);
161  void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab,
162  uint idx);
163  void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab);
164  void optimize_straight_join(table_map join_tables);
165  bool greedy_search(table_map remaining_tables);
166  bool best_extension_by_limited_search(table_map remaining_tables, uint idx,
167  uint current_search_depth);
169  uint idx,
170  uint current_search_depth);
171  bool consider_plan(uint idx, Opt_trace_object *trace_obj);
173  bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab,
174  table_map remaining_tables,
175  bool loosescan,
176  double *newcount,
177  double *newcost);
178  void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab,
179  table_map remaining_tables,
180  TABLE_LIST *sjm_nest, double *newcount,
181  double *newcost);
182  void semijoin_mat_lookup_access_paths(uint last_inner, TABLE_LIST *sjm_nest,
183  double *newcount, double *newcost);
184  void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab,
185  double *newcount, double *newcost);
186 
187  double lateral_derived_cost(const JOIN_TAB *tab, const uint idx,
188  const double prefix_rowcount,
189  const Cost_model_server *cost_model);
190 
192  bool plan_has_duplicate_tabs() const;
193  static uint determine_search_depth(uint search_depth, uint table_count);
194 };
195 
196 void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
197  double *rowcount_arg);
198 
199 /**
200  Calculate 'Post read filtering' effect of JOIN::conds for table
201  'tab'. Only conditions that are not directly involved in the chosen
202  access method shall be included in the calculation of this 'Post
203  read filtering' effect.
204 
205  The function first identifies fields that are directly used by the
206  access method. This includes columns used by range and ref access types,
207  and predicates on the identified columns (if any) will not be taken into
208  account when the filtering effect is calculated.
209 
210  The function will then calculate the filtering effect of any predicate
211  that applies to 'tab' and is not depending on the columns used by the
212  access method. The source of information with highest accuracy is
213  always preferred and is as follows:
214  1) Row estimates from the range optimizer
215  2) Row estimates from index statistics (records per key)
216  3) Guesstimates
217 
218  Thus, after identifying columns that are used by the access method,
219  the function will look for rows estimates made by the range optimizer.
220  If found, the estimates from the range optimizer are calculated into
221  the filtering effect.
222 
223  The function then goes through JOIN::conds to get estimates from any
224  remaining predicate that applies to 'tab' and does not depend on any
225  tables that come later in the join sequence. Predicates that depend on
226  columns that are either used by the access method or used in the row
227  estimate from the range optimizer will not be considered in this phase.
228 
229  @param tab The table condition filtering effect is calculated
230  for
231  @param keyuse Describes the 'ref' access method (if any) that is
232  chosen
233  @param used_tables Tables earlier in the join sequence than 'tab'
234  @param fanout The number of rows read by the chosen access
235  method for each row combination of previous tables
236  @param is_join_buffering Whether or not condition filtering is about
237  to be calculated for an access method using join
238  buffering.
239  @param write_to_trace Wheter we should print the filtering effect calculated
240  by histogram statistics and the final aggregated filtering
241  effect to optimizer trace.
242  @param parent_trace The parent trace object where the final aggregated
243  filtering effect will be printed if "write_to_trace" is
244  set to true.
245 
246  @return the 'post read filtering' effect (between 0 and 1) of
247  JOIN::conds
248 */
249 float calculate_condition_filter(const JOIN_TAB *const tab,
250  const Key_use *const keyuse,
251  table_map used_tables, double fanout,
252  bool is_join_buffering, bool write_to_trace,
253  Opt_trace_object &parent_trace);
254 
255 /**
256  Find the cost for a ref lookup on the given index, assumed to return
257  “num_rows” rows. The cost will be capped by “worst_seeks”
258  (see find_worst_seeks()).
259  */
260 double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno,
261  double num_rows, double worst_seeks);
262 
264  public:
265  /**
266  "Less than" comparison function object used to compare two JOIN_TAB
267  objects based on a number of factors in this order:
268 
269  - table before another table that depends on it (straight join,
270  outer join etc), then
271  - table before another table that depends on it to use a key
272  as access method, then
273  - table with smallest number of records first, then
274  - the table with lowest-value pointer (i.e., the one located
275  in the lowest memory address) first.
276 
277  @param jt1 first JOIN_TAB object
278  @param jt2 second JOIN_TAB object
279 
280  @note The order relation implemented by Join_tab_compare_default is not
281  transitive, i.e. it is possible to choose a, b and c such that
282  (a @< b) && (b @< c) but (c @< a). This is the case in the
283  following example:
284 
285  a: dependent = @<none@> found_records = 3
286  b: dependent = @<none@> found_records = 4
287  c: dependent = b found_records = 2
288 
289  a @< b: because a has fewer records
290  b @< c: because c depends on b (e.g outer join dependency)
291  c @< a: because c has fewer records
292 
293  This implies that the result of a sort using the relation
294  implemented by Join_tab_compare_default () depends on the order in
295  which elements are compared, i.e. the result is
296  implementation-specific.
297 
298  @return
299  true if jt1 is smaller than jt2, false otherwise
300  */
301  bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const;
302 };
303 
304 #endif /* SQL_PLANNER_INCLUDED */
API for getting cost estimates for server operations that are not directly related to a table object.
Definition: opt_costmodel.h:41
Query optimization plan node.
Definition: sql_select.h:597
TABLE_LIST * table_ref
points to table reference
Definition: sql_select.h:629
Definition: sql_optimizer.h:125
Definition: sql_planner.h:263
A Key_use represents an equality predicate of the form (table.column = val), where the column is inde...
Definition: sql_select.h:173
A JSON object (unordered set of key/value pairs).
Definition: opt_trace.h:801
This class determines the optimal join order for tables within a basic query block,...
Definition: sql_planner.h:74
JOIN *const join
Definition: sql_planner.h:92
bool got_final_plan
False/true at start/end of choose_table_order().
Definition: sql_planner.h:136
bool test_all_ref_keys
If true, find_best_ref() must go through all keys, no shortcutting allowed.
Definition: sql_planner.h:126
const bool has_sj
Definition: sql_planner.h:120
nested_join_map cur_embedding_map
Bitmap of all join nests embedding the last table appended to the current partial join.
Definition: sql_planner.h:100
~Optimize_table_order()=default
bool found_plan_with_allowed_sj
True if we found a complete plan using only allowed semijoin strategies.
Definition: sql_planner.h:129
bool use_best_so_far
Set true when we have decided to return with a "good enough" plan.
Definition: sql_planner.h:139
const TABLE_LIST *const emb_sjm_nest
If non-NULL, we are optimizing a materialized semi-join nest.
Definition: sql_planner.h:105
THD *const thd
Definition: sql_planner.h:91
const uint prune_level
Definition: sql_planner.h:94
const uint search_depth
Definition: sql_planner.h:93
const table_map excluded_tables
When calculating a plan for a materialized semi-join nest, best_access_path() needs to know not only ...
Definition: sql_planner.h:114
table_map m_lateral_deps
If 'this' is body of lateral derived table: map of tables in the same FROM clause as this derived tab...
Definition: sql_lex.h:811
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:821
char * pos
Definition: do_ctype.cc:76
void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab, double *newcount, double *newcost)
Find best access paths for semi-join DuplicateWeedout strategy and calculate rowcount and cost based ...
Definition: sql_planner.cc:3912
Optimize_table_order(THD *thd_arg, JOIN *join_arg, TABLE_LIST *sjm_nest_arg)
Definition: sql_planner.cc:115
static uint determine_search_depth(uint search_depth, uint table_count)
Heuristic procedure to automatically guess a reasonable degree of exhaustiveness for the greedy searc...
Definition: sql_planner.cc:2045
bool greedy_search(table_map remaining_tables)
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
Definition: sql_planner.cc:2295
void recalculate_lateral_deps_incrementally(uint first_tab_no)
Update join->deps_of_remaining_lateral_derived_tables after adding JOIN_TAB first_tab_no-1 to the pla...
Definition: sql_planner.cc:4712
void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab, table_map remaining_tables, TABLE_LIST *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeScan strategy and calculate rowcount and cost based o...
Definition: sql_planner.cc:3785
table_map calculate_lateral_deps_of_final_plan(uint tab_no) const
Calculate the lateral dependencies of the suffix of JOIN_TABs from tab_no to join->tables-1 in the fi...
Definition: sql_planner.cc:4656
double find_cost_for_ref(const THD *thd, TABLE *table, unsigned keyno, double num_rows, double worst_seeks)
Find the cost for a ref lookup on the given index, assumed to return “num_rows” rows.
Definition: sql_planner.cc:133
bool check_interleaving_with_nj(JOIN_TAB *next_tab)
Check interleaving with an inner tables of an outer join for extension table.
Definition: sql_planner.cc:3569
void recalculate_lateral_deps(uint first_tab_no)
Set join->deps_of_remaining_lateral_derived_tables to the set of lateral dependencies of the tables i...
Definition: sql_planner.cc:4680
void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab, uint idx)
Do semi-join optimization step after we've added a new tab to join prefix.
Definition: sql_planner.cc:4070
bool best_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search.
Definition: sql_planner.cc:2696
void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg, double *rowcount_arg)
Calculate a cost of given partial join order.
Definition: sql_planner.cc:2416
float calculate_condition_filter(const JOIN_TAB *const tab, const Key_use *const keyuse, table_map used_tables, double fanout, bool is_join_buffering, bool write_to_trace, Opt_trace_object &parent_trace)
Calculate 'Post read filtering' effect of JOIN::conds for table 'tab'.
Definition: sql_planner.cc:1215
table_map eq_ref_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Heuristic utility used by best_extension_by_limited_search().
Definition: sql_planner.cc:3045
void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab)
Nested joins perspective: Remove the last table from the join order.
Definition: sql_planner.cc:4619
double lateral_derived_cost(const JOIN_TAB *tab, const uint idx, const double prefix_rowcount, const Cost_model_server *cost_model)
If table is a lateral derived table, calculates the "cost of materialization", which is the cost of a...
Definition: sql_planner.cc:887
bool fix_semijoin_strategies()
Fix semi-join strategies for the picked join order.
Definition: sql_planner.cc:3340
bool semijoin_loosescan_fill_driving_table_position(const JOIN_TAB *s, table_map remaining_tables, uint idx, double prefix_rowcount, POSITION *loose_scan_pos)
Fills a POSITION object of the driving table of a semi-join LooseScan range, with the cheapest access...
Definition: sql_planner.cc:1577
void semijoin_mat_lookup_access_paths(uint last_inner, TABLE_LIST *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeLookup strategy.
Definition: sql_planner.cc:3867
bool plan_has_duplicate_tabs() const
Check if any TABLE_LIST appears twice in the plan (which is an error).
Definition: sql_planner.cc:4747
bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab, table_map remaining_tables, bool loosescan, double *newcount, double *newcost)
Find best access paths for semi-join FirstMatch or LooseScan strategy and calculate rowcount and cost...
Definition: sql_planner.cc:3644
bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const
"Less than" comparison function object used to compare two JOIN_TAB objects based on a number of fact...
Definition: sql_planner.cc:1822
bool consider_plan(uint idx, Opt_trace_object *trace_obj)
Cost calculation of another (partial-)QEP has been completed.
Definition: sql_planner.cc:2458
double calculate_scan_cost(const JOIN_TAB *tab, const uint idx, const Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan)
Calculate the cost of range/table/index scanning table 'tab'.
Definition: sql_planner.cc:741
Key_use * find_best_ref(const JOIN_TAB *tab, const table_map remaining_tables, const uint idx, const double prefix_rowcount, bool *found_condition, table_map *ref_depends_map, uint *used_key_parts)
Find the best index to do 'ref' access on for a table.
Definition: sql_planner.cc:192
void optimize_straight_join(table_map join_tables)
Select the best ways to access the tables in a query without reordering them.
Definition: sql_planner.cc:2085
void best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx, bool disable_jbuf, const double prefix_rowcount, POSITION *pos)
Find the best access path for an extension of a partial execution plan and add this path to the plan.
Definition: sql_planner.cc:952
bool choose_table_order()
Entry point to table join order optimization.
Definition: sql_planner.cc:1918
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:55
uint64_t table_map
Definition: my_table_map.h:29
std::string join(Container cont, const std::string &delim)
join elements of an container into a string separated by a delimiter.
Definition: string.h:144
ulonglong nested_join_map
Definition: nested_join.h:36
Classes used for query optimizations.
table_map get_lateral_deps(const JOIN_TAB &tab)
Find the lateral dependencies of 'tab'.
Definition: sql_planner.h:52
ulonglong nested_join_map
Definition: sql_planner.h:45
A position of table within a join order.
Definition: sql_select.h:350
Definition: table.h:2694
bool is_derived() const
Return true if this represents a derived table (an unnamed view)
Definition: table.h:2960
Query_expression * derived_query_expression() const
Return the query expression of a derived table or view.
Definition: table.h:3166
Definition: table.h:1394
unsigned int uint
Definition: uca-dump.cc:29