MySQL 9.6.0
Source Code Documentation
make_join_hypergraph.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH
25#define SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH 1
26
27#include <assert.h>
28
29#include <algorithm>
30#include <array>
31#include <cstddef>
32#include <optional>
33#include <span>
34#include <string>
35#include <unordered_map>
36#include <utility>
37
38#include "map_helpers.h"
39#include "my_table_map.h"
40#include "sql/item.h"
46#include "sql/mem_root_array.h"
47#include "sql/sql_const.h"
48
49class Field;
50class JOIN;
51class Query_block;
52class THD;
53struct MEM_ROOT;
55struct TABLE;
56
57/**
58 A sargable (from “Search ARGument”) predicate is one that we can attempt
59 to push down into an index (what we'd call “ref access” or “index range
60 scan”/“quick”). This structure denotes one such instance, precomputed from
61 all the predicates in the given hypergraph.
62 */
64 // Index into the “predicates” array in the graph.
66
67 // The predicate is assumed to be <field> = <other_side>.
68 // Later, we could push down other kinds of relations, such as
69 // greater-than.
72
73 /// True if it is safe to evaluate "other_side" during optimization. It must
74 /// be constant during execution. Also, it should not contain subqueries or
75 /// stored procedures, which we do not want to execute during optimization.
77};
78
79/// Information about a join condition that can potentially be pushed down as a
80/// sargable predicate for a Node.
82 /// The condition that may be pushed as a sargable predicate.
84 /// The relational expression from which this condition is pushable.
86};
87
88/**
89 A struct containing a join hypergraph of a single query block, encapsulating
90 the constraints given by the relational expressions (e.g. inner joins are
91 more freely reorderable than outer joins).
92
93 Since the Hypergraph class does not carry any payloads for nodes and edges,
94 and we need to associate e.g. TABLE pointers with each node, we store our
95 extra data in “nodes” and “edges”, indexed the same way the hypergraph is
96 indexed.
97 */
100 : graph(mem_root),
106
108
109 /// Flags set when AccessPaths are proposed to secondary engines for costing.
110 /// The intention of these flags is to avoid traversing the AccessPath tree to
111 /// check for certain criteria.
112 /// TODO (tikoldit) Move to JOIN or Secondary_engine_execution_context, so
113 /// that JoinHypergraph can be immutable during planning
115
116 // Maps table->tableno() to an index in “nodes”, also suitable for
117 // a bit index in a NodeMap. This is normally the identity mapping,
118 // except for when scalar-to-derived conversion is active.
119 std::array<int, MAX_TABLES> table_num_to_node_num;
120
121 class Node final {
122 public:
124 : m_table{table},
128 assert(mem_root != nullptr);
129 assert(table != nullptr);
130 }
131
132 TABLE *table() const { return m_table; }
133
134 void AddSargable(const SargablePredicate &predicate) {
135 assert(predicate.predicate_index >= 0);
136 assert(predicate.field != nullptr);
137 assert(predicate.other_side != nullptr);
138 m_sargable_predicates.push_back(predicate);
139 }
140
143 }
144
145 /**
146 Add a join condition that is potentially pushable as a sargable predicate
147 for this node.
148
149 @param cond The condition to be considered for pushdown.
150 @param from The relational expression from which this condition
151 originates.
152 */
153 void AddPushable(Item *cond, const RelationalExpression *from) {
154 // Don't add duplicate conditions, as this causes their selectivity to
155 // be applied multiple times, giving poor row estimates (cf.
156 // bug#36135001).
157 if (std::ranges::none_of(m_pushable_conditions,
158 [&](const PushableJoinCondition &other) {
159 return ItemsAreEqual(cond, other.cond);
160 })) {
161 m_pushable_conditions.push_back({.cond = cond, .from = from});
162 }
163 }
164
167 }
168
171 }
172
174 m_lateral_dependencies = dependencies;
175 }
176
177 int64_t read_set_width() const { return m_read_set_width; }
178
179 /* Estimated rows cardinality after table filters. */
180 std::optional<double> cardinality;
181
182 private:
184 // List of all sargable predicates (see SargablePredicate) where
185 // the field is part of this table. When we see the node for
186 // the first time, we will evaluate all of these and consider
187 // creating access paths that exploit these predicates.
189
190 // Join conditions that are potentially pushable to this node
191 // as sargable predicates (if they are sargable, they will be
192 // added to sargable_predicates below, together with sargable
193 // non-join conditions). This is a verbatim copy of
194 // the m_pushable_conditions member in RelationalExpression,
195 // which is computed as a side effect during join pushdown.
196 // (We could in principle have gone and collected all join conditions
197 // ourselves when determining sargable conditions, but there would be
198 // a fair amount of duplicated code in determining pushability,
199 // which is why regular join pushdown does the computation.)
201
202 // The lateral dependencies of this table. That is, the set of tables that
203 // must be available on the outer side of a nested loop join in which this
204 // table is on the inner side. This map may be set for LATERAL derived
205 // tables and derived tables with outer references, and for table functions.
207
208 // The estimated size (in bytes) of a row. This is used when making cost
209 // estimates for hash joins. It is cached here to avoid computing it
210 // repeatedly.
212 };
214
215 // Note that graph.edges contain each edge twice (see Hypergraph
216 // for more information), so edges[i] corresponds to graph.edges[i*2].
218
219 // The first <num_where_predicates> are filter predicates. These are the
220 // predicates that may be added as filters on nodes in the join tree by
221 // setting the corresponding bit in AccessPath::filter_predicates, which at
222 // the end of join optimization gets expanded to proper FILTER access paths by
223 // ExpandFilterAccessPaths(). Despite the name "num_where_predicates", they
224 // are not necessarily WHERE predicates. They include:
225 //
226 // - Actual WHERE predicates that could not be pushed down into one of the
227 // join conditions.
228 //
229 // - Predicates that could be pushed all the way down and become a table
230 // filter. These could be WHERE predicates, but they could also be
231 // predicates that are possible to push all the way down, but not possible
232 // to pull all the way up. Take for example "SELECT 1 FROM t1 LEFT JOIN t2
233 // ON t1.a=t2.a AND t2.b=1". The t2.b=1 predicate can be pushed down as a
234 // table filter, but it cannot be used as a WHERE predicate, as it would
235 // incorrectly filter out the NULL-complemented rows. Still, such table
236 // filters are also counted in "num_where_predicates".
237 //
238 // - Predicates that are join conditions in some inner join that is involved
239 // in a cycle in the join hypergraph. These are applied as filters in the
240 // join tree if the tables are joined via another edge in the cycle. Such
241 // predicates are also not necessarily possible to pull up to the WHERE
242 // clause. If they for example came from an inner join on the inner side of
243 // some outer join, they cannot be applied as WHERE predicates. Even so,
244 // they are still counted in "num_where_predicates".
245 //
246 // The rest are sargable join predicates. The latter are in the array
247 // solely so they can be part of the regular “applied_filters” bitmap
248 // if they are pushed down into an index, so that we know that we
249 // don't need to apply them as join conditions later.
250 //
251 // If a sargable join predicate comes from a join that is part of a cycle in
252 // the hypergraph, it could be present in both partitions of the array.
254
255 // How many of the predicates in "predicates" are filter predicates. The rest
256 // of them are sargable join predicates.
258
259 /// Returns an immutable view of the filter predicates portion of the
260 /// predicates array.
261 std::span<const Predicate> filter_predicates() const {
262 return {predicates.data(), num_where_predicates};
263 }
264
265 /// Returns a mutable view of the filter predicates portion of the predicates
266 /// array.
267 std::span<Predicate> filter_predicates() {
268 return {predicates.data(), num_where_predicates};
269 }
270
271 // A bitmap over predicates that are, or contain, at least one
272 // materializable subquery.
274
275 /// Returns a pointer to the query block that is being planned.
276 const Query_block *query_block() const { return m_query_block; }
277
278 /// Returns a pointer to the JOIN object of the query block being planned.
279 const JOIN *join() const;
280
281 /// Whether, at any point, we could rewrite (t1 LEFT JOIN t2) LEFT JOIN t3
282 /// to t1 LEFT JOIN (t2 LEFT JOIN t3) or vice versa. We record this purely to
283 /// note that we have a known bug/inconsistency in row count estimation
284 /// in this case. Bug #33550360 has a test case, but to sum up:
285 /// Assume t1 and t3 has 25 rows, but t2 has zero rows, and selectivities
286 /// are 0.1. As long as we clamp the row count in FindOutputRowsForJoin(),
287 /// and do not modify these selectivities somehow, the former would give
288 /// 62.5 rows, and the second would give 25 rows. This should be fixed
289 /// eventually, but for now, at least we register it, so that we do not
290 /// assert-fail on inconsistent row counts if this (known) issue could be
291 /// the root cause.
293
294 // True if estimates for one or more Nodes in the graph have been provided
295 // by the secondary engine (i.e. at least one Node has the field `cardinality`
296 // set).
298
299 /// The set of nodes that are on the inner side of some outer join.
301
302 /// The set of nodes that are on the inner side of some semijoin.
304
305 /// The set of nodes that are on the inner side of some antijoin.
307
308 /// The set of nodes that are represented by table_function. This will be set
309 /// only when optimizing for secondary engine.
311
312 int FindSargableJoinPredicate(const Item *predicate) const {
313 const auto iter = m_sargable_join_predicates.find(predicate);
314 return iter == m_sargable_join_predicates.cend() ? -1 : iter->second;
315 }
316
317 void AddSargableJoinPredicate(const Item *predicate, int position) {
318 m_sargable_join_predicates.emplace(predicate, position);
319 }
320
321 private:
322 // For each sargable join condition, maps into its index in “predicates”.
323 // We need the predicate index when applying the join to figure out whether
324 // we have already applied the predicate or not; see
325 // {applied,subsumed}_sargable_join_predicates in AccessPath.
327
328 /// A pointer to the query block being planned.
330};
331
332/**
333 Make a join hypergraph from the query block given by “graph->query_block”,
334 converting from MySQL's join list structures to the ones expected
335 by the hypergraph join optimizer. This includes pushdown of WHERE
336 predicates, and detection of conditions suitable for hash join.
337 However, it does not include simplification of outer to inner joins;
338 that is presumed to have happened earlier.
339
340 The result is suitable for running DPhyp (subgraph_enumeration.h)
341 to find optimal join planning.
342 */
343bool MakeJoinHypergraph(THD *thd, JoinHypergraph *graph,
344 bool *where_is_always_false);
345
346// Exposed for testing only.
348 JoinHypergraph *graph);
349
352 const std::array<int, MAX_TABLES> &table_num_to_node_num);
353
354std::string PrintDottyHypergraph(const JoinHypergraph &graph);
355
356/// Estimates the size of the hash join keys generated from the equi-join
357/// predicates in "expr".
359
361
362#endif // SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH
Definition: field.h:570
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:928
Definition: sql_optimizer.h:133
Definition: make_join_hypergraph.h:121
void AddSargable(const SargablePredicate &predicate)
Definition: make_join_hypergraph.h:134
const Mem_root_array< SargablePredicate > & sargable_predicates() const
Definition: make_join_hypergraph.h:141
TABLE * m_table
Definition: make_join_hypergraph.h:183
Mem_root_array< SargablePredicate > m_sargable_predicates
Definition: make_join_hypergraph.h:188
int64_t m_read_set_width
Definition: make_join_hypergraph.h:211
hypergraph::NodeMap lateral_dependencies() const
Definition: make_join_hypergraph.h:169
hypergraph::NodeMap m_lateral_dependencies
Definition: make_join_hypergraph.h:206
Node(MEM_ROOT *mem_root, TABLE *table, int64_t read_set_width)
Definition: make_join_hypergraph.h:123
Mem_root_array< PushableJoinCondition > m_pushable_conditions
Definition: make_join_hypergraph.h:200
int64_t read_set_width() const
Definition: make_join_hypergraph.h:177
const Mem_root_array< PushableJoinCondition > & pushable_conditions() const
Definition: make_join_hypergraph.h:165
TABLE * table() const
Definition: make_join_hypergraph.h:132
std::optional< double > cardinality
Definition: make_join_hypergraph.h:180
void AddPushable(Item *cond, const RelationalExpression *from)
Add a join condition that is potentially pushable as a sargable predicate for this node.
Definition: make_join_hypergraph.h:153
void set_lateral_dependencies(hypergraph::NodeMap dependencies)
Definition: make_join_hypergraph.h:173
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
Definition: overflow_bitset.h:81
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1179
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
std::unordered_map, but allocated on a MEM_ROOT.
Definition: map_helpers.h:291
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
Definition of an undirected (join) hypergraph.
bool ItemsAreEqual(const Item *a, const Item *b)
Returns true iff the two items are equal, as in a->eq(b), after unwrapping refs and Item_cache object...
Definition: item.cc:11568
bool MakeJoinHypergraph(THD *thd, JoinHypergraph *graph, bool *where_is_always_false)
Make a join hypergraph from the query block given by “graph->query_block”, converting from MySQL's jo...
Definition: make_join_hypergraph.cc:3801
hypergraph::NodeMap GetNodeMapFromTableMap(table_map table_map, const std::array< int, MAX_TABLES > &table_num_to_node_num)
std::string PrintDottyHypergraph(const JoinHypergraph &graph)
For the given hypergraph, make a textual representation in the form of a dotty graph.
Definition: make_join_hypergraph.cc:2611
void MakeJoinGraphFromRelationalExpression(THD *thd, RelationalExpression *expr, JoinHypergraph *graph)
Convert a join rooted at “expr” into a join hypergraph that encapsulates the constraints given by the...
Definition: make_join_hypergraph.cc:3314
table_map GetVisibleTables(const RelationalExpression *expr)
Definition: make_join_hypergraph.cc:4088
size_t EstimateHashJoinKeyWidth(const RelationalExpression *expr)
Estimates the size of the hash join keys generated from the equi-join predicates in "expr".
Definition: make_join_hypergraph.cc:2710
uint64_t table_map
Definition: my_table_map.h:30
uint64_t NodeMap
Since our graphs can never have more than 61 tables, node sets and edge lists are implemented using 6...
Definition: node_map.h:40
OverflowBitset is a fixed-size (once allocated) bitmap that is optimized for the common case of few e...
For updating an AccessPath's costs by a secondary engine, i.e.
uint64_t SecondaryEngineCostingFlags
Definition: secondary_engine_costing_flags.h:39
File containing constants that can be used throughout the server.
A struct containing a join hypergraph of a single query block, encapsulating the constraints given by...
Definition: make_join_hypergraph.h:98
unsigned num_where_predicates
Definition: make_join_hypergraph.h:257
hypergraph::NodeMap nodes_inner_to_outer_join
The set of nodes that are on the inner side of some outer join.
Definition: make_join_hypergraph.h:300
hypergraph::NodeMap nodes_for_table_function
The set of nodes that are represented by table_function.
Definition: make_join_hypergraph.h:310
Mem_root_array< Node > nodes
Definition: make_join_hypergraph.h:213
std::span< Predicate > filter_predicates()
Returns a mutable view of the filter predicates portion of the predicates array.
Definition: make_join_hypergraph.h:267
hypergraph::NodeMap nodes_inner_to_antijoin
The set of nodes that are on the inner side of some antijoin.
Definition: make_join_hypergraph.h:306
bool has_reordered_left_joins
Whether, at any point, we could rewrite (t1 LEFT JOIN t2) LEFT JOIN t3 to t1 LEFT JOIN (t2 LEFT JOIN ...
Definition: make_join_hypergraph.h:292
void AddSargableJoinPredicate(const Item *predicate, int position)
Definition: make_join_hypergraph.h:317
hypergraph::Hypergraph graph
Definition: make_join_hypergraph.h:107
hypergraph::NodeMap nodes_inner_to_semijoin
The set of nodes that are on the inner side of some semijoin.
Definition: make_join_hypergraph.h:303
const Query_block * query_block() const
Returns a pointer to the query block that is being planned.
Definition: make_join_hypergraph.h:276
JoinHypergraph(MEM_ROOT *mem_root, const Query_block *query_block)
Definition: make_join_hypergraph.h:99
const JOIN * join() const
Returns a pointer to the JOIN object of the query block being planned.
Definition: make_join_hypergraph.cc:3799
const Query_block * m_query_block
A pointer to the query block being planned.
Definition: make_join_hypergraph.h:329
Mem_root_array< Predicate > predicates
Definition: make_join_hypergraph.h:253
int FindSargableJoinPredicate(const Item *predicate) const
Definition: make_join_hypergraph.h:312
OverflowBitset materializable_predicates
Definition: make_join_hypergraph.h:273
SecondaryEngineCostingFlags secondary_engine_costing_flags
Flags set when AccessPaths are proposed to secondary engines for costing.
Definition: make_join_hypergraph.h:114
mem_root_unordered_map< const Item *, int > m_sargable_join_predicates
Definition: make_join_hypergraph.h:326
std::array< int, MAX_TABLES > table_num_to_node_num
Definition: make_join_hypergraph.h:119
std::span< const Predicate > filter_predicates() const
Returns an immutable view of the filter predicates portion of the predicates array.
Definition: make_join_hypergraph.h:261
bool has_estimates_from_secondary_engine
Definition: make_join_hypergraph.h:297
Mem_root_array< JoinPredicate > edges
Definition: make_join_hypergraph.h:217
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
Information about a join condition that can potentially be pushed down as a sargable predicate for a ...
Definition: make_join_hypergraph.h:81
const RelationalExpression * from
The relational expression from which this condition is pushable.
Definition: make_join_hypergraph.h:85
Item * cond
The condition that may be pushed as a sargable predicate.
Definition: make_join_hypergraph.h:83
Represents an expression tree in the relational algebra of joins.
Definition: relational_expression.h:155
A sargable (from “Search ARGument”) predicate is one that we can attempt to push down into an index (...
Definition: make_join_hypergraph.h:63
int predicate_index
Definition: make_join_hypergraph.h:65
Field * field
Definition: make_join_hypergraph.h:70
Item * other_side
Definition: make_join_hypergraph.h:71
bool can_evaluate
True if it is safe to evaluate "other_side" during optimization.
Definition: make_join_hypergraph.h:76
Definition: table.h:1450
Definition: hypergraph.h:92