MySQL  8.0.19
Source Code Documentation
sql_planner.h
Go to the documentation of this file.
1 #ifndef SQL_PLANNER_INCLUDED
2 #define SQL_PLANNER_INCLUDED
3 
4 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file sql/sql_planner.h
28  Join planner classes.
29 */
30 
31 #include <sys/types.h>
32 
33 #include "my_inttypes.h"
34 #include "my_table_map.h"
35 
36 class Cost_model_server;
37 class JOIN;
38 class JOIN_TAB;
39 class Key_use;
40 class Opt_trace_object;
41 class THD;
42 struct TABLE_LIST;
43 struct POSITION;
44 
46 
47 /**
48  This class determines the optimal join order for tables within
49  a basic query block, ie a query specification clause, possibly extended
50  with semi-joined tables from embedded subqueries.
51 
52  This class takes as prerequisite a join class where all dependencies among
53  tables have been sorted out, all possible access paths have been
54  sorted out, and all statistics information has been filled in.
55 
56  The class has a sole public function that will calculate the most
57  optimal plan based on the inputs and the environment, such as prune level
58  and greedy optimizer search depth. For more information, see the
59  function headers for the private functions greedy_search(),
60  best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
61 */
62 
64  public:
65  Optimize_table_order(THD *thd_arg, JOIN *join_arg, TABLE_LIST *sjm_nest_arg);
67  /**
68  Entry point to table join order optimization.
69  For further description, see class header and private function headers.
70 
71  @return false if successful, true if error
72  */
73  bool choose_table_order();
74 
75  private:
76  THD *const thd; // Pointer to current THD
77  JOIN *const join; // Pointer to the current plan being developed
78  const uint search_depth; // Maximum search depth to apply in greedy search
79  const uint prune_level; // pruning heuristics to be applied
80  // (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
81  /**
82  Bitmap of all join nests embedding the last table appended to the current
83  partial join.
84  */
86  /**
87  If non-NULL, we are optimizing a materialized semi-join nest.
88  If NULL, we are optimizing a complete join plan.
89  */
90  const TABLE_LIST *const emb_sjm_nest;
91  /**
92  When calculating a plan for a materialized semi-join nest,
93  best_access_path() needs to know not only the remaining tables within the
94  semi-join nest, but also all tables outside of this nest, because there may
95  be key references between the semi-join nest and the outside tables
96  that should not be considered when materializing the semi-join nest.
97  @c excluded_tables tracks these tables.
98  */
100  /*
101  No need to call advance_sj_state() when
102  1) there are no semijoin nests or
103  2) we are optimizing a materialized semijoin nest.
104  */
105  const bool has_sj;
106 
107  /**
108  If true, find_best_ref() must go through all keys, no shortcutting
109  allowed.
110  */
112 
113  /// True if we found a complete plan using only allowed semijoin strategies.
115 
116  /**
117  False/true at start/end of choose_table_order().
118  Helps member functions know if current plan is in join->positions or
119  join->best_positions.
120  */
122 
123  inline Key_use *find_best_ref(const JOIN_TAB *tab,
124  const table_map remaining_tables,
125  const uint idx, const double prefix_rowcount,
126  bool *found_condition,
127  table_map *ref_depends_map,
128  uint *used_key_parts);
129  double calculate_scan_cost(const JOIN_TAB *tab, const uint idx,
130  const Key_use *best_ref,
131  const double prefix_rowcount,
132  const bool found_condition,
133  const bool disable_jbuf,
134  double *rows_after_filtering,
135  Opt_trace_object *trace_access_scan);
136  void best_access_path(JOIN_TAB *tab, const table_map remaining_tables,
137  const uint idx, bool disable_jbuf,
138  const double prefix_rowcount, POSITION *pos);
140  const JOIN_TAB *s, table_map remaining_tables, uint idx,
141  double prefix_rowcount, POSITION *loose_scan_pos);
142  bool check_interleaving_with_nj(JOIN_TAB *next_tab);
143  void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab,
144  uint idx);
145  void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab);
146  void optimize_straight_join(table_map join_tables);
147  bool greedy_search(table_map remaining_tables);
148  bool best_extension_by_limited_search(table_map remaining_tables, uint idx,
149  uint current_search_depth);
151  uint idx,
152  uint current_search_depth);
153  bool consider_plan(uint idx, Opt_trace_object *trace_obj);
155  bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab,
156  table_map remaining_tables,
157  bool loosescan,
158  double *newcount,
159  double *newcost);
160  void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab,
161  table_map remaining_tables,
162  TABLE_LIST *sjm_nest, double *newcount,
163  double *newcost);
164  void semijoin_mat_lookup_access_paths(uint last_inner, TABLE_LIST *sjm_nest,
165  double *newcount, double *newcost);
166  void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab,
167  double *newcount, double *newcost);
168 
169  double lateral_derived_cost(const JOIN_TAB *tab, const uint idx,
170  const double prefix_rowcount,
171  const Cost_model_server *cost_model);
172 
173  static uint determine_search_depth(uint search_depth, uint table_count);
174 };
175 
176 void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
177  double *rowcount_arg);
178 
179 /**
180  Calculate 'Post read filtering' effect of JOIN::conds for table
181  'tab'. Only conditions that are not directly involved in the chosen
182  access method shall be included in the calculation of this 'Post
183  read filtering' effect.
184 
185  The function first identifies fields that are directly used by the
186  access method. This includes columns used by range and ref access types,
187  and predicates on the identified columns (if any) will not be taken into
188  account when the filtering effect is calculated.
189 
190  The function will then calculate the filtering effect of any predicate
191  that applies to 'tab' and is not depending on the columns used by the
192  access method. The source of information with highest accuracy is
193  always preferred and is as follows:
194  1) Row estimates from the range optimizer
195  2) Row estimates from index statistics (records per key)
196  3) Guesstimates
197 
198  Thus, after identifying columns that are used by the access method,
199  the function will look for rows estimates made by the range optimizer.
200  If found, the estimates from the range optimizer are calculated into
201  the filtering effect.
202 
203  The function then goes through JOIN::conds to get estimates from any
204  remaining predicate that applies to 'tab' and does not depend on any
205  tables that come later in the join sequence. Predicates that depend on
206  columns that are either used by the access method or used in the row
207  estimate from the range optimizer will not be considered in this phase.
208 
209  @param tab The table condition filtering effect is calculated
210  for
211  @param keyuse Describes the 'ref' access method (if any) that is
212  chosen
213  @param used_tables Tables earlier in the join sequence than 'tab'
214  @param fanout The number of rows read by the chosen access
215  method for each row combination of previous tables
216  @param is_join_buffering Whether or not condition filtering is about
217  to be calculated for an access method using join
218  buffering.
219  @param write_to_trace Wheter we should print the filtering effect calculated
220  by histogram statistics and the final aggregated filtering
221  effect to optimizer trace.
222  @param parent_trace The parent trace object where the final aggregated
223  filtering effect will be printed if "write_to_trace" is
224  set to true.
225 
226  @return the 'post read filtering' effect (between 0 and 1) of
227  JOIN::conds
228 */
229 float calculate_condition_filter(const JOIN_TAB *const tab,
230  const Key_use *const keyuse,
231  table_map used_tables, double fanout,
232  bool is_join_buffering, bool write_to_trace,
233  Opt_trace_object &parent_trace);
234 
235 /**
236  "Less than" comparison function object used to compare two JOIN_TAB
237  objects based on a number of factors in this order:
238 
239  - table before another table that depends on it (straight join,
240  outer join etc), then
241  - table before another table that depends on it to use a key
242  as access method, then
243  - table with smallest number of records first, then
244  - the table with lowest-value pointer (i.e., the one located
245  in the lowest memory address) first.
246 
247  @param jt1 first JOIN_TAB object
248  @param jt2 second JOIN_TAB object
249 
250  @note The order relation implemented by Join_tab_compare_default is not
251  transitive, i.e. it is possible to choose a, b and c such that
252  (a @< b) && (b @< c) but (c @< a). This is the case in the
253  following example:
254 
255  a: dependent = @<none@> found_records = 3
256  b: dependent = @<none@> found_records = 4
257  c: dependent = b found_records = 2
258 
259  a @< b: because a has fewer records
260  b @< c: because c depends on b (e.g outer join dependency)
261  c @< a: because c has fewer records
262 
263  This implies that the result of a sort using the relation
264  implemented by Join_tab_compare_default () depends on the order in
265  which elements are compared, i.e. the result is
266  implementation-specific.
267 
268  @return
269  true if jt1 is smaller than jt2, false otherwise
270 */
272  public:
273  bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const;
274 };
275 
276 #endif /* SQL_PLANNER_INCLUDED */
Optimize_table_order::semijoin_mat_lookup_access_paths
void semijoin_mat_lookup_access_paths(uint last_inner, TABLE_LIST *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeLookup strategy.
Definition: sql_planner.cc:3863
THD
Definition: sql_class.h:764
nested_join_map
ulonglong nested_join_map
Definition: sql_planner.h:43
Optimize_table_order::determine_search_depth
static uint determine_search_depth(uint search_depth, uint table_count)
Heuristic procedure to automatically guess a reasonable degree of exhaustiveness for the greedy searc...
Definition: sql_planner.cc:2052
nested_join_map
ulonglong nested_join_map
Definition: nested_join.h:36
Optimize_table_order::Optimize_table_order
Optimize_table_order(THD *thd_arg, JOIN *join_arg, TABLE_LIST *sjm_nest_arg)
Definition: sql_planner.cc:118
Optimize_table_order::fix_semijoin_strategies
bool fix_semijoin_strategies()
Fix semi-join strategies for the picked join order.
Definition: sql_planner.cc:3336
pos
char * pos
Definition: do_ctype.cc:76
Optimize_table_order::optimize_straight_join
void optimize_straight_join(table_map join_tables)
Select the best ways to access the tables in a query without reordering them.
Definition: sql_planner.cc:2092
Optimize_table_order::cur_embedding_map
nested_join_map cur_embedding_map
Bitmap of all join nests embedding the last table appended to the current partial join.
Definition: sql_planner.h:85
mysql_harness::join
std::string join(Container cont, const std::string &delim)
join elements of an container into a string seperated by a delimiter.
Definition: string.h:144
Optimize_table_order::thd
THD *const thd
Definition: sql_planner.h:76
Cost_model_server
API for getting cost estimates for server operations that are not directly related to a table object.
Definition: opt_costmodel.h:41
Optimize_table_order::find_best_ref
Key_use * find_best_ref(const JOIN_TAB *tab, const table_map remaining_tables, const uint idx, const double prefix_rowcount, bool *found_condition, table_map *ref_depends_map, uint *used_key_parts)
Find the best index to do 'ref' access on for a table.
Definition: sql_planner.cc:177
calculate_condition_filter
float calculate_condition_filter(const JOIN_TAB *const tab, const Key_use *const keyuse, table_map used_tables, double fanout, bool is_join_buffering, bool write_to_trace, Opt_trace_object &parent_trace)
Calculate 'Post read filtering' effect of JOIN::conds for table 'tab'.
Definition: sql_planner.cc:1223
Optimize_table_order::test_all_ref_keys
bool test_all_ref_keys
If true, find_best_ref() must go through all keys, no shortcutting allowed.
Definition: sql_planner.h:111
Optimize_table_order::got_final_plan
bool got_final_plan
False/true at start/end of choose_table_order().
Definition: sql_planner.h:121
Optimize_table_order::advance_sj_state
void advance_sj_state(table_map remaining_tables, const JOIN_TAB *tab, uint idx)
Do semi-join optimization step after we've added a new tab to join prefix.
Definition: sql_planner.cc:4066
Optimize_table_order::check_interleaving_with_nj
bool check_interleaving_with_nj(JOIN_TAB *next_tab)
Check interleaving with an inner tables of an outer join for extension table.
Definition: sql_planner.cc:3567
Optimize_table_order::~Optimize_table_order
~Optimize_table_order()
Definition: sql_planner.h:66
JOIN
Definition: sql_optimizer.h:174
my_inttypes.h
Optimize_table_order::consider_plan
bool consider_plan(uint idx, Opt_trace_object *trace_obj)
Cost calculation of another (partial-)QEP has been completed.
Definition: sql_planner.cc:2455
Optimize_table_order::emb_sjm_nest
const TABLE_LIST *const emb_sjm_nest
If non-NULL, we are optimizing a materialized semi-join nest.
Definition: sql_planner.h:90
Optimize_table_order::semijoin_dupsweedout_access_paths
void semijoin_dupsweedout_access_paths(uint first_tab, uint last_tab, double *newcount, double *newcost)
Find best access paths for semi-join DuplicateWeedout strategy and calculate rowcount and cost based ...
Definition: sql_planner.cc:3908
uint
unsigned int uint
Definition: uca-dump.cc:29
get_partial_join_cost
void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg, double *rowcount_arg)
Calculate a cost of given partial join order.
Definition: sql_planner.cc:2413
Optimize_table_order::semijoin_firstmatch_loosescan_access_paths
bool semijoin_firstmatch_loosescan_access_paths(uint first_tab, uint last_tab, table_map remaining_tables, bool loosescan, double *newcount, double *newcost)
Find best access paths for semi-join FirstMatch or LooseScan strategy and calculate rowcount and cost...
Definition: sql_planner.cc:3642
Optimize_table_order::best_access_path
void best_access_path(JOIN_TAB *tab, const table_map remaining_tables, const uint idx, bool disable_jbuf, const double prefix_rowcount, POSITION *pos)
Find the best access path for an extension of a partial execution plan and add this path to the plan.
Definition: sql_planner.cc:967
TABLE_LIST
Definition: table.h:2467
Optimize_table_order::lateral_derived_cost
double lateral_derived_cost(const JOIN_TAB *tab, const uint idx, const double prefix_rowcount, const Cost_model_server *cost_model)
If table is a lateral derived table, calculates the "cost of materialization", which is the cost of a...
Definition: sql_planner.cc:902
table_map
uint64_t table_map
Definition: my_table_map.h:30
Optimize_table_order::choose_table_order
bool choose_table_order()
Entry point to table join order optimization.
Definition: sql_planner.cc:1926
Optimize_table_order
This class determines the optimal join order for tables within a basic query block,...
Definition: sql_planner.h:63
Optimize_table_order::excluded_tables
const table_map excluded_tables
When calculating a plan for a materialized semi-join nest, best_access_path() needs to know not only ...
Definition: sql_planner.h:99
my_table_map.h
Optimize_table_order::prune_level
const uint prune_level
Definition: sql_planner.h:79
Optimize_table_order::greedy_search
bool greedy_search(table_map remaining_tables)
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
Definition: sql_planner.cc:2301
Optimize_table_order::backout_nj_state
void backout_nj_state(const table_map remaining_tables, const JOIN_TAB *tab)
Nested joins perspective: Remove the last table from the join order.
Definition: sql_planner.cc:4615
Optimize_table_order::semijoin_mat_scan_access_paths
void semijoin_mat_scan_access_paths(uint last_inner_tab, uint last_outer_tab, table_map remaining_tables, TABLE_LIST *sjm_nest, double *newcount, double *newcost)
Find best access paths for semi-join MaterializeScan strategy and calculate rowcount and cost based o...
Definition: sql_planner.cc:3782
Optimize_table_order::eq_ref_extension_by_limited_search
table_map eq_ref_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Heuristic utility used by best_extension_by_limited_search().
Definition: sql_planner.cc:3043
Optimize_table_order::found_plan_with_allowed_sj
bool found_plan_with_allowed_sj
True if we found a complete plan using only allowed semijoin strategies.
Definition: sql_planner.h:114
Join_tab_compare_default::operator()
bool operator()(const JOIN_TAB *jt1, const JOIN_TAB *jt2) const
Definition: sql_planner.cc:1830
Key_use
A Key_use represents an equality predicate of the form (table.column = val), where the column is inde...
Definition: sql_select.h:168
Opt_trace_object
A JSON object (unordered set of key/value pairs).
Definition: opt_trace.h:813
Optimize_table_order::join
JOIN *const join
Definition: sql_planner.h:77
Optimize_table_order::semijoin_loosescan_fill_driving_table_position
bool semijoin_loosescan_fill_driving_table_position(const JOIN_TAB *s, table_map remaining_tables, uint idx, double prefix_rowcount, POSITION *loose_scan_pos)
Fills a POSITION object of the driving table of a semi-join LooseScan range, with the cheapest access...
Definition: sql_planner.cc:1585
ulonglong
unsigned long long int ulonglong
Definition: my_inttypes.h:55
JOIN_TAB
Query optimization plan node.
Definition: sql_select.h:579
Optimize_table_order::best_extension_by_limited_search
bool best_extension_by_limited_search(table_map remaining_tables, uint idx, uint current_search_depth)
Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search.
Definition: sql_planner.cc:2695
POSITION
A position of table within a join order.
Definition: sql_select.h:343
Join_tab_compare_default
"Less than" comparison function object used to compare two JOIN_TAB objects based on a number of fact...
Definition: sql_planner.h:271
Optimize_table_order::calculate_scan_cost
double calculate_scan_cost(const JOIN_TAB *tab, const uint idx, const Key_use *best_ref, const double prefix_rowcount, const bool found_condition, const bool disable_jbuf, double *rows_after_filtering, Opt_trace_object *trace_access_scan)
Calculate the cost of range/table/index scanning table 'tab'.
Definition: sql_planner.cc:755
Optimize_table_order::search_depth
const uint search_depth
Definition: sql_planner.h:78
Optimize_table_order::has_sj
const bool has_sj
Definition: sql_planner.h:105