MySQL 9.5.0
Source Code Documentation
make_join_hypergraph.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH
25#define SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH 1
26
27#include <assert.h>
28
29#include <algorithm>
30#include <array>
31#include <cstddef>
32#include <optional>
33#include <span>
34#include <string>
35#include <unordered_map>
36#include <utility>
37
38#include "map_helpers.h"
39#include "my_table_map.h"
40#include "sql/item.h"
46#include "sql/mem_root_array.h"
47#include "sql/sql_const.h"
48
49class Field;
50class JOIN;
51class Query_block;
52class THD;
53struct MEM_ROOT;
55struct TABLE;
56
57/**
58 A sargable (from “Search ARGument”) predicate is one that we can attempt
59 to push down into an index (what we'd call “ref access” or “index range
60 scan”/“quick”). This structure denotes one such instance, precomputed from
61 all the predicates in the given hypergraph.
62 */
64 // Index into the “predicates” array in the graph.
66
67 // The predicate is assumed to be <field> = <other_side>.
68 // Later, we could push down other kinds of relations, such as
69 // greater-than.
72
73 /// True if it is safe to evaluate "other_side" during optimization. It must
74 /// be constant during execution. Also, it should not contain subqueries or
75 /// stored procedures, which we do not want to execute during optimization.
77};
78
79/// Information about a join condition that can potentially be pushed down as a
80/// sargable predicate for a Node.
82 /// The condition that may be pushed as a sargable predicate.
84 /// The relational expression from which this condition is pushable.
86};
87
88/**
89 A struct containing a join hypergraph of a single query block, encapsulating
90 the constraints given by the relational expressions (e.g. inner joins are
91 more freely reorderable than outer joins).
92
93 Since the Hypergraph class does not carry any payloads for nodes and edges,
94 and we need to associate e.g. TABLE pointers with each node, we store our
95 extra data in “nodes” and “edges”, indexed the same way the hypergraph is
96 indexed.
97 */
100 : graph(mem_root),
106
108
109 /// Flags set when AccessPaths are proposed to secondary engines for costing.
110 /// The intention of these flags is to avoid traversing the AccessPath tree to
111 /// check for certain criteria.
112 /// TODO (tikoldit) Move to JOIN or Secondary_engine_execution_context, so
113 /// that JoinHypergraph can be immutable during planning
115
116 // Maps table->tableno() to an index in “nodes”, also suitable for
117 // a bit index in a NodeMap. This is normally the identity mapping,
118 // except for when scalar-to-derived conversion is active.
119 std::array<int, MAX_TABLES> table_num_to_node_num;
120
121 class Node final {
122 public:
124 : m_table{table},
127 assert(mem_root != nullptr);
128 assert(table != nullptr);
129 }
130
131 TABLE *table() const { return m_table; }
132
133 void AddSargable(const SargablePredicate &predicate) {
134 assert(predicate.predicate_index >= 0);
135 assert(predicate.field != nullptr);
136 assert(predicate.other_side != nullptr);
137 m_sargable_predicates.push_back(predicate);
138 }
139
142 }
143
144 /**
145 Add a join condition that is potentially pushable as a sargable predicate
146 for this node.
147
148 @param cond The condition to be considered for pushdown.
149 @param from The relational expression from which this condition
150 originates.
151 */
152 void AddPushable(Item *cond, const RelationalExpression *from) {
153 // Don't add duplicate conditions, as this causes their selectivity to
154 // be applied multiple times, giving poor row estimates (cf.
155 // bug#36135001).
156 if (std::ranges::none_of(m_pushable_conditions,
157 [&](const PushableJoinCondition &other) {
158 return ItemsAreEqual(cond, other.cond);
159 })) {
160 m_pushable_conditions.push_back({.cond = cond, .from = from});
161 }
162 }
163
166 }
167
170 }
171
173 m_lateral_dependencies = dependencies;
174 }
175
176 /* Estimated rows cardinality after table filters. */
177 std::optional<double> cardinality;
178
179 private:
181 // List of all sargable predicates (see SargablePredicate) where
182 // the field is part of this table. When we see the node for
183 // the first time, we will evaluate all of these and consider
184 // creating access paths that exploit these predicates.
186
187 // Join conditions that are potentially pushable to this node
188 // as sargable predicates (if they are sargable, they will be
189 // added to sargable_predicates below, together with sargable
190 // non-join conditions). This is a verbatim copy of
191 // the m_pushable_conditions member in RelationalExpression,
192 // which is computed as a side effect during join pushdown.
193 // (We could in principle have gone and collected all join conditions
194 // ourselves when determining sargable conditions, but there would be
195 // a fair amount of duplicated code in determining pushability,
196 // which is why regular join pushdown does the computation.)
198
199 // The lateral dependencies of this table. That is, the set of tables that
200 // must be available on the outer side of a nested loop join in which this
201 // table is on the inner side. This map may be set for LATERAL derived
202 // tables and derived tables with outer references, and for table functions.
204 };
206
207 // Note that graph.edges contain each edge twice (see Hypergraph
208 // for more information), so edges[i] corresponds to graph.edges[i*2].
210
211 // The first <num_where_predicates> are filter predicates. These are the
212 // predicates that may be added as filters on nodes in the join tree by
213 // setting the corresponding bit in AccessPath::filter_predicates, which at
214 // the end of join optimization gets expanded to proper FILTER access paths by
215 // ExpandFilterAccessPaths(). Despite the name "num_where_predicates", they
216 // are not necessarily WHERE predicates. They include:
217 //
218 // - Actual WHERE predicates that could not be pushed down into one of the
219 // join conditions.
220 //
221 // - Predicates that could be pushed all the way down and become a table
222 // filter. These could be WHERE predicates, but they could also be
223 // predicates that are possible to push all the way down, but not possible
224 // to pull all the way up. Take for example "SELECT 1 FROM t1 LEFT JOIN t2
225 // ON t1.a=t2.a AND t2.b=1". The t2.b=1 predicate can be pushed down as a
226 // table filter, but it cannot be used as a WHERE predicate, as it would
227 // incorrectly filter out the NULL-complemented rows. Still, such table
228 // filters are also counted in "num_where_predicates".
229 //
230 // - Predicates that are join conditions in some inner join that is involved
231 // in a cycle in the join hypergraph. These are applied as filters in the
232 // join tree if the tables are joined via another edge in the cycle. Such
233 // predicates are also not necessarily possible to pull up to the WHERE
234 // clause. If they for example came from an inner join on the inner side of
235 // some outer join, they cannot be applied as WHERE predicates. Even so,
236 // they are still counted in "num_where_predicates".
237 //
238 // The rest are sargable join predicates. The latter are in the array
239 // solely so they can be part of the regular “applied_filters” bitmap
240 // if they are pushed down into an index, so that we know that we
241 // don't need to apply them as join conditions later.
242 //
243 // If a sargable join predicate comes from a join that is part of a cycle in
244 // the hypergraph, it could be present in both partitions of the array.
246
247 // How many of the predicates in "predicates" are filter predicates. The rest
248 // of them are sargable join predicates.
250
251 /// Returns an immutable view of the filter predicates portion of the
252 /// predicates array.
253 std::span<const Predicate> filter_predicates() const {
254 return {predicates.data(), num_where_predicates};
255 }
256
257 /// Returns a mutable view of the filter predicates portion of the predicates
258 /// array.
259 std::span<Predicate> filter_predicates() {
260 return {predicates.data(), num_where_predicates};
261 }
262
263 // A bitmap over predicates that are, or contain, at least one
264 // materializable subquery.
266
267 /// Returns a pointer to the query block that is being planned.
268 const Query_block *query_block() const { return m_query_block; }
269
270 /// Returns a pointer to the JOIN object of the query block being planned.
271 const JOIN *join() const;
272
273 /// Whether, at any point, we could rewrite (t1 LEFT JOIN t2) LEFT JOIN t3
274 /// to t1 LEFT JOIN (t2 LEFT JOIN t3) or vice versa. We record this purely to
275 /// note that we have a known bug/inconsistency in row count estimation
276 /// in this case. Bug #33550360 has a test case, but to sum up:
277 /// Assume t1 and t3 has 25 rows, but t2 has zero rows, and selectivities
278 /// are 0.1. As long as we clamp the row count in FindOutputRowsForJoin(),
279 /// and do not modify these selectivities somehow, the former would give
280 /// 62.5 rows, and the second would give 25 rows. This should be fixed
281 /// eventually, but for now, at least we register it, so that we do not
282 /// assert-fail on inconsistent row counts if this (known) issue could be
283 /// the root cause.
285
286 // True if estimates for one or more Nodes in the graph have been provided
287 // by the secondary engine (i.e. at least one Node has the field `cardinality`
288 // set).
290
291 /// The set of nodes that are on the inner side of some outer join.
293
294 /// The set of nodes that are on the inner side of some semijoin.
296
297 /// The set of nodes that are on the inner side of some antijoin.
299
300 /// The set of nodes that are represented by table_function. This will be set
301 /// only when optimizing for secondary engine.
303
304 int FindSargableJoinPredicate(const Item *predicate) const {
305 const auto iter = m_sargable_join_predicates.find(predicate);
306 return iter == m_sargable_join_predicates.cend() ? -1 : iter->second;
307 }
308
309 void AddSargableJoinPredicate(const Item *predicate, int position) {
310 m_sargable_join_predicates.emplace(predicate, position);
311 }
312
313 private:
314 // For each sargable join condition, maps into its index in “predicates”.
315 // We need the predicate index when applying the join to figure out whether
316 // we have already applied the predicate or not; see
317 // {applied,subsumed}_sargable_join_predicates in AccessPath.
319
320 /// A pointer to the query block being planned.
322};
323
324/**
325 Make a join hypergraph from the query block given by “graph->query_block”,
326 converting from MySQL's join list structures to the ones expected
327 by the hypergraph join optimizer. This includes pushdown of WHERE
328 predicates, and detection of conditions suitable for hash join.
329 However, it does not include simplification of outer to inner joins;
330 that is presumed to have happened earlier.
331
332 The result is suitable for running DPhyp (subgraph_enumeration.h)
333 to find optimal join planning.
334 */
335bool MakeJoinHypergraph(THD *thd, JoinHypergraph *graph,
336 bool *where_is_always_false);
337
338// Exposed for testing only.
340 JoinHypergraph *graph);
341
344 const std::array<int, MAX_TABLES> &table_num_to_node_num);
345
346std::string PrintDottyHypergraph(const JoinHypergraph &graph);
347
348/// Estimates the size of the hash join keys generated from the equi-join
349/// predicates in "expr".
351
353
354#endif // SQL_JOIN_OPTIMIZER_MAKE_JOIN_HYPERGRAPH
Definition: field.h:570
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:928
Definition: sql_optimizer.h:133
Definition: make_join_hypergraph.h:121
Node(MEM_ROOT *mem_root, TABLE *table)
Definition: make_join_hypergraph.h:123
void AddSargable(const SargablePredicate &predicate)
Definition: make_join_hypergraph.h:133
const Mem_root_array< SargablePredicate > & sargable_predicates() const
Definition: make_join_hypergraph.h:140
TABLE * m_table
Definition: make_join_hypergraph.h:180
Mem_root_array< SargablePredicate > m_sargable_predicates
Definition: make_join_hypergraph.h:185
hypergraph::NodeMap lateral_dependencies() const
Definition: make_join_hypergraph.h:168
hypergraph::NodeMap m_lateral_dependencies
Definition: make_join_hypergraph.h:203
Mem_root_array< PushableJoinCondition > m_pushable_conditions
Definition: make_join_hypergraph.h:197
const Mem_root_array< PushableJoinCondition > & pushable_conditions() const
Definition: make_join_hypergraph.h:164
TABLE * table() const
Definition: make_join_hypergraph.h:131
std::optional< double > cardinality
Definition: make_join_hypergraph.h:177
void AddPushable(Item *cond, const RelationalExpression *from)
Add a join condition that is potentially pushable as a sargable predicate for this node.
Definition: make_join_hypergraph.h:152
void set_lateral_dependencies(hypergraph::NodeMap dependencies)
Definition: make_join_hypergraph.h:172
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
Definition: overflow_bitset.h:81
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1179
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
std::unordered_map, but allocated on a MEM_ROOT.
Definition: map_helpers.h:291
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
Definition of an undirected (join) hypergraph.
bool ItemsAreEqual(const Item *a, const Item *b)
Returns true iff the two items are equal, as in a->eq(b), after unwrapping refs and Item_cache object...
Definition: item.cc:11568
bool MakeJoinHypergraph(THD *thd, JoinHypergraph *graph, bool *where_is_always_false)
Make a join hypergraph from the query block given by “graph->query_block”, converting from MySQL's jo...
Definition: make_join_hypergraph.cc:3805
hypergraph::NodeMap GetNodeMapFromTableMap(table_map table_map, const std::array< int, MAX_TABLES > &table_num_to_node_num)
std::string PrintDottyHypergraph(const JoinHypergraph &graph)
For the given hypergraph, make a textual representation in the form of a dotty graph.
Definition: make_join_hypergraph.cc:2627
void MakeJoinGraphFromRelationalExpression(THD *thd, RelationalExpression *expr, JoinHypergraph *graph)
Convert a join rooted at “expr” into a join hypergraph that encapsulates the constraints given by the...
Definition: make_join_hypergraph.cc:3330
table_map GetVisibleTables(const RelationalExpression *expr)
Definition: make_join_hypergraph.cc:4092
size_t EstimateHashJoinKeyWidth(const RelationalExpression *expr)
Estimates the size of the hash join keys generated from the equi-join predicates in "expr".
Definition: make_join_hypergraph.cc:2726
uint64_t table_map
Definition: my_table_map.h:30
uint64_t NodeMap
Since our graphs can never have more than 61 tables, node sets and edge lists are implemented using 6...
Definition: node_map.h:40
OverflowBitset is a fixed-size (once allocated) bitmap that is optimized for the common case of few e...
For updating an AccessPath's costs by a secondary engine, i.e.
uint64_t SecondaryEngineCostingFlags
Definition: secondary_engine_costing_flags.h:39
File containing constants that can be used throughout the server.
A struct containing a join hypergraph of a single query block, encapsulating the constraints given by...
Definition: make_join_hypergraph.h:98
unsigned num_where_predicates
Definition: make_join_hypergraph.h:249
hypergraph::NodeMap nodes_inner_to_outer_join
The set of nodes that are on the inner side of some outer join.
Definition: make_join_hypergraph.h:292
hypergraph::NodeMap nodes_for_table_function
The set of nodes that are represented by table_function.
Definition: make_join_hypergraph.h:302
Mem_root_array< Node > nodes
Definition: make_join_hypergraph.h:205
std::span< Predicate > filter_predicates()
Returns a mutable view of the filter predicates portion of the predicates array.
Definition: make_join_hypergraph.h:259
hypergraph::NodeMap nodes_inner_to_antijoin
The set of nodes that are on the inner side of some antijoin.
Definition: make_join_hypergraph.h:298
bool has_reordered_left_joins
Whether, at any point, we could rewrite (t1 LEFT JOIN t2) LEFT JOIN t3 to t1 LEFT JOIN (t2 LEFT JOIN ...
Definition: make_join_hypergraph.h:284
void AddSargableJoinPredicate(const Item *predicate, int position)
Definition: make_join_hypergraph.h:309
hypergraph::Hypergraph graph
Definition: make_join_hypergraph.h:107
hypergraph::NodeMap nodes_inner_to_semijoin
The set of nodes that are on the inner side of some semijoin.
Definition: make_join_hypergraph.h:295
const Query_block * query_block() const
Returns a pointer to the query block that is being planned.
Definition: make_join_hypergraph.h:268
JoinHypergraph(MEM_ROOT *mem_root, const Query_block *query_block)
Definition: make_join_hypergraph.h:99
const JOIN * join() const
Returns a pointer to the JOIN object of the query block being planned.
Definition: make_join_hypergraph.cc:3803
const Query_block * m_query_block
A pointer to the query block being planned.
Definition: make_join_hypergraph.h:321
Mem_root_array< Predicate > predicates
Definition: make_join_hypergraph.h:245
int FindSargableJoinPredicate(const Item *predicate) const
Definition: make_join_hypergraph.h:304
OverflowBitset materializable_predicates
Definition: make_join_hypergraph.h:265
SecondaryEngineCostingFlags secondary_engine_costing_flags
Flags set when AccessPaths are proposed to secondary engines for costing.
Definition: make_join_hypergraph.h:114
mem_root_unordered_map< const Item *, int > m_sargable_join_predicates
Definition: make_join_hypergraph.h:318
std::array< int, MAX_TABLES > table_num_to_node_num
Definition: make_join_hypergraph.h:119
std::span< const Predicate > filter_predicates() const
Returns an immutable view of the filter predicates portion of the predicates array.
Definition: make_join_hypergraph.h:253
bool has_estimates_from_secondary_engine
Definition: make_join_hypergraph.h:289
Mem_root_array< JoinPredicate > edges
Definition: make_join_hypergraph.h:209
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
Information about a join condition that can potentially be pushed down as a sargable predicate for a ...
Definition: make_join_hypergraph.h:81
const RelationalExpression * from
The relational expression from which this condition is pushable.
Definition: make_join_hypergraph.h:85
Item * cond
The condition that may be pushed as a sargable predicate.
Definition: make_join_hypergraph.h:83
Represents an expression tree in the relational algebra of joins.
Definition: relational_expression.h:155
A sargable (from “Search ARGument”) predicate is one that we can attempt to push down into an index (...
Definition: make_join_hypergraph.h:63
int predicate_index
Definition: make_join_hypergraph.h:65
Field * field
Definition: make_join_hypergraph.h:70
Item * other_side
Definition: make_join_hypergraph.h:71
bool can_evaluate
True if it is safe to evaluate "other_side" during optimization.
Definition: make_join_hypergraph.h:76
Definition: table.h:1435
Definition: hypergraph.h:92