MySQL 9.1.0
Source Code Documentation
|
#include "sql/join_optimizer/join_optimizer.h"
#include <assert.h>
#include <float.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include <bit>
#include <bitset>
#include <cmath>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <ostream>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
#include "ft_global.h"
#include "map_helpers.h"
#include "mem_root_deque.h"
#include "my_alloc.h"
#include "my_base.h"
#include "my_bitmap.h"
#include "my_dbug.h"
#include "my_inttypes.h"
#include "my_sqlcommand.h"
#include "my_sys.h"
#include "my_table_map.h"
#include "mysql/components/services/bits/psi_bits.h"
#include "mysql/udf_registration_types.h"
#include "mysqld_error.h"
#include "prealloced_array.h"
#include "scope_guard.h"
#include "sql/field.h"
#include "sql/filesort.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_cmpfunc.h"
#include "sql/item_func.h"
#include "sql/item_sum.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/bit_utils.h"
#include "sql/join_optimizer/build_interesting_orders.h"
#include "sql/join_optimizer/compare_access_paths.h"
#include "sql/join_optimizer/cost_model.h"
#include "sql/join_optimizer/derived_keys.h"
#include "sql/join_optimizer/estimate_selectivity.h"
#include "sql/join_optimizer/explain_access_path.h"
#include "sql/join_optimizer/find_contained_subqueries.h"
#include "sql/join_optimizer/graph_simplification.h"
#include "sql/join_optimizer/hypergraph.h"
#include "sql/join_optimizer/interesting_orders.h"
#include "sql/join_optimizer/interesting_orders_defs.h"
#include "sql/join_optimizer/make_join_hypergraph.h"
#include "sql/join_optimizer/node_map.h"
#include "sql/join_optimizer/optimizer_trace.h"
#include "sql/join_optimizer/overflow_bitset.h"
#include "sql/join_optimizer/print_utils.h"
#include "sql/join_optimizer/relational_expression.h"
#include "sql/join_optimizer/secondary_engine_costing_flags.h"
#include "sql/join_optimizer/subgraph_enumeration.h"
#include "sql/join_optimizer/walk_access_paths.h"
#include "sql/join_type.h"
#include "sql/key.h"
#include "sql/key_spec.h"
#include "sql/mem_root_array.h"
#include "sql/olap.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_hints.h"
#include "sql/parse_tree_node_base.h"
#include "sql/partition_info.h"
#include "sql/query_options.h"
#include "sql/range_optimizer/group_index_skip_scan_plan.h"
#include "sql/range_optimizer/index_range_scan_plan.h"
#include "sql/range_optimizer/index_skip_scan_plan.h"
#include "sql/range_optimizer/internal.h"
#include "sql/range_optimizer/path_helpers.h"
#include "sql/range_optimizer/range_analysis.h"
#include "sql/range_optimizer/range_opt_param.h"
#include "sql/range_optimizer/range_optimizer.h"
#include "sql/range_optimizer/rowid_ordered_retrieval_plan.h"
#include "sql/range_optimizer/tree.h"
#include "sql/sql_array.h"
#include "sql/sql_base.h"
#include "sql/sql_class.h"
#include "sql/sql_cmd.h"
#include "sql/sql_const.h"
#include "sql/sql_executor.h"
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_opt_exec_shared.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_partition.h"
#include "sql/sql_select.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/table_function.h"
#include "sql/uniques.h"
#include "sql/window.h"
#include "template_utils.h"
Namespaces | |
namespace | anonymous_namespace{join_optimizer.cc} |
Typedefs | |
using | anonymous_namespace{join_optimizer.cc}::AccessPathArray = Prealloced_array< AccessPath *, 4 > |
Functions | |
string | anonymous_namespace{join_optimizer.cc}::PrintAccessPath (const AccessPath &path, const JoinHypergraph &graph, const char *description_for_trace) |
void | anonymous_namespace{join_optimizer.cc}::PrintJoinOrder (const AccessPath *path, string *join_order) |
Used by optimizer trace to print join order of join paths. More... | |
AccessPath * | anonymous_namespace{join_optimizer.cc}::CreateMaterializationPath (THD *thd, JOIN *join, AccessPath *path, TABLE *temp_table, Temp_table_param *temp_table_param, bool copy_items, double *distinct_rows, MaterializePathParameters::DedupType dedup_reason) |
Sets up an access path for materializing the results returned from a path in a temporary table. More... | |
AccessPath * | anonymous_namespace{join_optimizer.cc}::GetSafePathToSort (THD *thd, JOIN *join, AccessPath *path, bool need_rowid, bool force_materialization=false) |
int | anonymous_namespace{join_optimizer.cc}::WasPushedDownToRef (Item *condition, const KeypartForRef *keyparts, unsigned num_keyparts) |
SecondaryEngineFlags | anonymous_namespace{join_optimizer.cc}::EngineFlags (const THD *thd) |
Lists the current secondary engine flags in use. More... | |
secondary_engine_modify_access_path_cost_t | anonymous_namespace{join_optimizer.cc}::SecondaryEngineCostHook (const THD *thd) |
Gets the secondary storage engine cost modification function, if any. More... | |
secondary_engine_check_optimizer_request_t | anonymous_namespace{join_optimizer.cc}::SecondaryEngineStateCheckHook (const THD *thd) |
Gets the secondary storage engine hypergraph state hook function, if any. More... | |
Item_func_match * | anonymous_namespace{join_optimizer.cc}::GetSargableFullTextPredicate (const Predicate &predicate) |
Returns the MATCH function of a predicate that can be pushed down to a full-text index. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsDeleteStatement (const THD *thd) |
Is the current statement a DELETE statement? More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsUpdateStatement (const THD *thd) |
Is the current statement a DELETE statement? More... | |
void | anonymous_namespace{join_optimizer.cc}::SetNumOutputRowsAfterFilter (AccessPath *path, double output_rows) |
Set the number of output rows after filter for an access path to a new value. More... | |
bool | anonymous_namespace{join_optimizer.cc}::CheckKilledOrError (THD *thd) |
Check if the statement is killed or an error has been raised. More... | |
void | anonymous_namespace{join_optimizer.cc}::FindAppliedAndSubsumedPredicatesForRangeScan (THD *thd, KEY *key, unsigned used_key_parts, unsigned num_exact_key_parts, TABLE *table, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, OverflowBitset *applied_predicates_out, OverflowBitset *subsumed_predicates_out) |
bool | anonymous_namespace{join_optimizer.cc}::CollectPossibleRangeScans (THD *thd, SEL_TREE *tree, RANGE_OPT_PARAM *param, OverflowBitset tree_applied_predicates, OverflowBitset tree_subsumed_predicates, const JoinHypergraph &graph, Mem_root_array< PossibleRangeScan > *possible_scans) |
double | anonymous_namespace{join_optimizer.cc}::EstimateOutputRowsFromRangeTree (THD *thd, const RANGE_OPT_PARAM ¶m, ha_rows total_rows, const Mem_root_array< PossibleRangeScan > &possible_scans, const JoinHypergraph &graph, OverflowBitset predicates) |
Based on estimates for all the different range scans (which cover different but potentially overlapping combinations of predicates), try to find an estimate for the number of rows scanning the given table, with all predicates applied. More... | |
AccessPath * | anonymous_namespace{join_optimizer.cc}::FindCheapestIndexRangeScan (THD *thd, SEL_TREE *tree, RANGE_OPT_PARAM *param, bool prefer_clustered_primary_key_scan, bool *inexact, bool need_rowid_ordered_rows) |
From a collection of index scans, find the single cheapest one and generate an AccessPath for it. More... | |
void | anonymous_namespace{join_optimizer.cc}::UpdateAppliedAndSubsumedPredicates (const uint idx, const Mem_root_array< PossibleRORScan > &possible_ror_scans, const RANGE_OPT_PARAM *param, OverflowBitset *applied_predicates, OverflowBitset *subsumed_predicates) |
int | anonymous_namespace{join_optimizer.cc}::GetRowIdOrdering (const TABLE *table, const LogicalOrderings *orderings, const Mem_root_array< ActiveIndexInfo > *active_indexes) |
bool | anonymous_namespace{join_optimizer.cc}::ContainsSubqueries (Item *item_arg) |
bool | anonymous_namespace{join_optimizer.cc}::HasConstantEqualityForField (const Mem_root_array< SargablePredicate > &sargable_predicates, const Field *field) |
Do we have a sargable predicate which checks if "field" is equal to a constant? More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsSubsumableFullTextPredicate (Item_func *condition) |
bool | anonymous_namespace{join_optimizer.cc}::IsLimitHintPushableToFullTextSearch (const Item_func_match *match, const JoinHypergraph &graph, uint64_t fulltext_predicates) |
bool | anonymous_namespace{join_optimizer.cc}::LateralDependenciesAreSatisfied (int node_idx, NodeMap tables, const JoinHypergraph &graph) |
Checks if the table given by "node_idx" has all its lateral dependencies satisfied by the set of tables given by "tables". More... | |
NodeMap | anonymous_namespace{join_optimizer.cc}::FindReachableTablesFrom (NodeMap tables, const JoinHypergraph &graph) |
Find the set of tables we can join directly against, given that we have the given set of tables on one of the sides (effectively the same concept as DPhyp's “neighborhood”). More... | |
bool | anonymous_namespace{join_optimizer.cc}::CanResolveMoreParameterTables (NodeMap outer, NodeMap inner, NodeMap outer_parameters, NodeMap inner_parameters, NodeMap outer_reachable, NodeMap inner_reachable) |
Is it possible to resolve more parameter tables before performing a nested loop join between "outer" and "inner", or will the join have to be performed first? More... | |
bool | anonymous_namespace{join_optimizer.cc}::DisallowParameterizedJoinPath (AccessPath *left_path, AccessPath *right_path, NodeMap left, NodeMap right, NodeMap left_reachable, NodeMap right_reachable, bool is_reorderable) |
Decide whether joining the two given paths would create a disallowed parameterized path. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsEmptyJoin (const RelationalExpression::Type join_type, bool left_is_empty, bool right_is_empty) |
Checks if the result of a join is empty, given that it is known that one or both of the join legs always produces an empty result. More... | |
void | anonymous_namespace{join_optimizer.cc}::MoveDegenerateJoinConditionToFilter (THD *thd, Query_block *query_block, const JoinPredicate **edge, AccessPath **right_path) |
If the ON clause of a left join only references tables on the right side of the join, pushing the condition into the right side is a valid thing to do. More... | |
AccessPath * | anonymous_namespace{join_optimizer.cc}::DeduplicateForSemijoin (THD *thd, AccessPath *path, Item **semijoin_group, int semijoin_group_size, RelationalExpression *expr) |
Build an access path that deduplicates its input on a certain grouping. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsConstantSingleRowPath (const AccessPath &path) |
Check if an access path returns at most one row, and it's constant throughout the query. More... | |
uint32_t | anonymous_namespace{join_optimizer.cc}::AddFlag (uint32_t flags, FuzzyComparisonResult flag) |
bool | anonymous_namespace{join_optimizer.cc}::HasFlag (uint32_t flags, FuzzyComparisonResult flag) |
PathComparisonResult | CompareAccessPaths (const LogicalOrderings &orderings, const AccessPath &a, const AccessPath &b, OrderingSet obsolete_orderings) |
bool | anonymous_namespace{join_optimizer.cc}::IsMaterializationPath (const AccessPath *path) |
AccessPath | anonymous_namespace{join_optimizer.cc}::MakeSortPathWithoutFilesort (THD *thd, AccessPath *child, ORDER *order, int ordering_state, int num_where_predicates) |
bool | anonymous_namespace{join_optimizer.cc}::CheckSupportedQuery (THD *thd) |
AccessPath * | anonymous_namespace{join_optimizer.cc}::CreateMaterializationOrStreamingPath (THD *thd, JOIN *join, AccessPath *path, bool need_rowid, bool copy_items) |
Set up an access path for streaming or materializing through a temporary table. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsImmediateDeleteCandidate (const Table_ref *table_ref, const Query_block *query_block) |
Is this DELETE target table a candidate for being deleted from immediately, while scanning the result of the join? It only checks if it is a candidate for immediate delete. More... | |
void | anonymous_namespace{join_optimizer.cc}::AddFieldsToTmpSet (Item *item, TABLE *table) |
Adds all fields of "table" that are referenced from "item" to table->tmp_set. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsImmediateUpdateCandidate (const Table_ref *table_ref, int node_idx, const JoinHypergraph &graph, table_map target_tables) |
Is this UPDATE target table a candidate for being updated immediately, while scanning the result of the join? It only checks if it is a candidate for immediate update. More... | |
table_map | anonymous_namespace{join_optimizer.cc}::FindUpdateDeleteTargetTables (const Query_block *query_block) |
Finds all the target tables of an UPDATE or DELETE statement. More... | |
table_map | anonymous_namespace{join_optimizer.cc}::FindImmediateUpdateDeleteCandidates (const JoinHypergraph &graph, table_map target_tables, bool is_delete) |
Finds all of the target tables of an UPDATE or DELETE statement that are candidates from being updated or deleted from immediately while scanning the results of the join, without need to buffer the row IDs in a temporary table for delayed update/delete after the join has completed. More... | |
NodeMap | anonymous_namespace{join_optimizer.cc}::FindFullTextSearchedTables (const JoinHypergraph &graph) |
bool | anonymous_namespace{join_optimizer.cc}::IsSargableFullTextIndexPredicate (Item *condition) |
uint64_t | anonymous_namespace{join_optimizer.cc}::FindSargableFullTextPredicates (const JoinHypergraph &graph) |
bool | anonymous_namespace{join_optimizer.cc}::InjectCastNodes (JoinHypergraph *graph) |
void | anonymous_namespace{join_optimizer.cc}::EnableFullTextCoveringIndexes (const Query_block *query_block) |
AccessPath * | anonymous_namespace{join_optimizer.cc}::CreateZeroRowsForEmptyJoin (JOIN *join, const char *cause) |
Creates a ZERO_ROWS access path for an always empty join result, or a ZERO_ROWS_AGGREGATED in case of an implicitly grouped query. More... | |
AccessPath | anonymous_namespace{join_optimizer.cc}::CreateStreamingAggregationPath (THD *thd, AccessPath *path, JOIN *join, olap_type olap, double row_estimate) |
Creates an AGGREGATE AccessPath, possibly with an intermediary STREAM node if one is needed. More... | |
bool | anonymous_namespace{join_optimizer.cc}::IsFinalPredicate (const Predicate &predicate) |
Check if a predicate in the WHERE clause should be applied after all tables have been joined together. More... | |
bool | anonymous_namespace{join_optimizer.cc}::SkipFinalPredicates (const AccessPathArray &candidates, const JoinHypergraph &graph) |
Can we skip the ApplyFinalPredicatesAndExpandFilters() step? More... | |
void | anonymous_namespace{join_optimizer.cc}::ApplyFinalPredicatesAndExpandFilters (THD *thd, const CostingReceiver &receiver, const JoinHypergraph &graph, const LogicalOrderings &orderings, FunctionalDependencySet *fd_set, AccessPathArray *root_candidates) |
static AccessPath * | anonymous_namespace{join_optimizer.cc}::CreateTemptableAggregationPath (THD *thd, Query_block *query_block, AccessPath *child_path, double *aggregate_rows) |
void | anonymous_namespace{join_optimizer.cc}::SplitHavingCondition (THD *thd, Item *cond, Item **having_cond, Item **having_cond_wf) |
void | anonymous_namespace{join_optimizer.cc}::ApplyHavingOrQualifyCondition (THD *thd, Item *having_cond, Query_block *query_block, const char *description_for_trace, AccessPathArray *root_candidates, CostingReceiver *receiver) |
JoinHypergraph::Node * | anonymous_namespace{join_optimizer.cc}::FindNodeWithTable (JoinHypergraph *graph, TABLE *table) |
bool | anonymous_namespace{join_optimizer.cc}::ForceMaterializationBeforeSort (const Query_block &query_block, bool need_rowid) |
If we have both ORDER BY and GROUP BY, we need a materialization step after the grouping (if windowing hasn't already given us one) – although in most cases, we only need to materialize one row at a time (streaming), so the performance loss should be very slight. More... | |
void | anonymous_namespace{join_optimizer.cc}::SetGroupSkipScanCardinality (AccessPath *path, double output_rows) |
Set the estimated number of output rows for a group skip scan to match the estimate calculated by EstimateDistinctRows() or EstimateAggregateRows(). More... | |
bool | anonymous_namespace{join_optimizer.cc}::ObeysIndexOrderHints (AccessPath *root_path, JOIN *join, bool grouping) |
AccessPathArray | anonymous_namespace{join_optimizer.cc}::ApplyOrderBy (THD *thd, const CostingReceiver &receiver, const LogicalOrderings &orderings, int order_by_ordering_idx, const Query_block &query_block, bool need_rowid, bool force_sort_rowids, const AccessPathArray &root_candidates) |
Apply the ORDER BY clause. More... | |
static AccessPath * | anonymous_namespace{join_optimizer.cc}::ApplyWindow (THD *thd, AccessPath *root_path, Window *window, JOIN *join, bool need_rowid_for_window) |
static int | anonymous_namespace{join_optimizer.cc}::FindBestOrderingForWindow (JOIN *join, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, Bounds_checked_array< bool > finished_windows, Bounds_checked_array< bool > tmp_buffer, int first_ordering_idx, int second_ordering_idx, Bounds_checked_array< bool > included_windows) |
Find the ordering that allows us to process the most unprocessed windows. More... | |
AccessPath * | anonymous_namespace{join_optimizer.cc}::MakeSortPathAndApplyWindows (THD *thd, JOIN *join, AccessPath *root_path, int ordering_idx, ORDER *order, const LogicalOrderings &orderings, Bounds_checked_array< bool > windows_this_iteration, FunctionalDependencySet fd_set, int num_where_predicates, bool need_rowid_for_window, int single_window_idx, Bounds_checked_array< bool > finished_windows, int *num_windows_left) |
bool | anonymous_namespace{join_optimizer.cc}::CheckFoundPlan (THD *thd, const AccessPathArray &candidates, bool is_secondary_engine) |
Check if at least one candidate for a valid query plan was found. More... | |
static AccessPathArray | ApplyWindowFunctions (THD *thd, const CostingReceiver &receiver, const LogicalOrderings &orderings, FunctionalDependencySet fd_set, bool aggregation_is_unordered, int order_by_ordering_idx, int distinct_ordering_idx, const JoinHypergraph &graph, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, Query_block *query_block, int num_where_predicates, bool need_rowid, AccessPathArray root_candidates) |
Apply window functions. More... | |
static bool | CompatibleTypesForIndexLookup (Item_func_eq *eq_item, Field *field, Item *value) |
Find out if "value" has a type which is compatible with "field" so that it can be used for an index lookup if there is an index on "field". More... | |
static void | PossiblyAddSargableCondition (THD *thd, Item *item, const CompanionSet &companion_set, TABLE *force_table, int predicate_index, bool is_join_condition, JoinHypergraph *graph) |
Find out whether “item” is a sargable condition; if so, add it to: More... | |
void | FindSargablePredicates (THD *thd, JoinHypergraph *graph) |
static bool | ComesFromSameMultiEquality (Item *cond1, Item_eq_base *cond2) |
static void | CacheCostInfoForJoinConditions (THD *thd, const Query_block *query_block, JoinHypergraph *graph) |
For each edge, cache some information for each of its join conditions. More... | |
static bool | IsAlreadyAggregated (const AccessPath *root_path) |
bool | ApplyAggregation (THD *thd, JoinHypergraph *graph, CostingReceiver &receiver, int group_by_ordering_idx, bool need_rowid, bool aggregation_is_unordered, const LogicalOrderings &orderings, const Mem_root_array< SortAheadOrdering > &sort_ahead_orderings, FunctionalDependencySet fd_set, Query_block *query_block, AccessPathArray &root_candidates) |
static AccessPath * | FindBestQueryPlanInner (THD *thd, Query_block *query_block, bool *retry, int *subgraph_pair_limit) |
Find the lowest-cost plan (which hopefully is also the cheapest to execute) of all the legal ways to execute the query. More... | |
AccessPath * | FindBestQueryPlan (THD *thd, Query_block *query_block) |
The main entry point for the hypergraph join optimizer; takes in a query block and returns an access path to execute it (or nullptr, for error). More... | |
bool ApplyAggregation | ( | THD * | thd, |
JoinHypergraph * | graph, | ||
CostingReceiver & | receiver, | ||
int | group_by_ordering_idx, | ||
bool | need_rowid, | ||
bool | aggregation_is_unordered, | ||
const LogicalOrderings & | orderings, | ||
const Mem_root_array< SortAheadOrdering > & | sort_ahead_orderings, | ||
FunctionalDependencySet | fd_set, | ||
Query_block * | query_block, | ||
AccessPathArray & | root_candidates | ||
) |
|
static |
Apply window functions.
Ordering of window functions is a tricky topic. We can apply window functions in any order that we'd like, but we would like to do as few sorts as possible. In its most general form, this would entail solving an instance of the traveling salesman problem (TSP), and although the number of windows is typically small (one or two in most queries), this can blow up for large numbers of windows.
Thankfully, window functions never add or remove rows. We also assume that all sorts are equally expensive (which isn't really true, as ones on more columns take more processing time and buffer, but it's close enough in practice), and we also ignore the fact that as we compute more buffers, the temporary tables and sort buffers will get more columns. These assumptions, combined with some reasonable assumptions about ordering transitivity (if an ordering A is more sorted than an ordering B, and B > C, then also A > C – the only thing that can disturb this is groupings, which we ignore for the sake of simplicity), mean that we need to care only about the number of sorts, and can do them greedily. Thus, at any point, we pick the ordering that allows us to process the largest number of windows, process them, remove them from consideration, and repeat until there are none left.
There is one more ordering complication; after windowing, we may have DISTINCT and/or ORDER BY, which may also benefit from groupings/orderings we leave after the last window. Thus, first of all, we see if there's an ordering that can satisfy them (ideally both if possible) and at least one window; if so, we save that ordering and those windows for last.
Temporary tables are set up in FinalizePlanForQueryBlock(). This is so that it is easier to have multiple different orderings for the temporary table parameters later.
|
static |
For each edge, cache some information for each of its join conditions.
This reduces work when repeatedly applying these join conditions later on. In particular, FindContainedSubqueries() contains a large amount of virtual function calls that we would like to avoid doing every time we consider a given join.
|
static |
PathComparisonResult CompareAccessPaths | ( | const LogicalOrderings & | orderings, |
const AccessPath & | a, | ||
const AccessPath & | b, | ||
OrderingSet | obsolete_orderings | ||
) |
|
static |
Find out if "value" has a type which is compatible with "field" so that it can be used for an index lookup if there is an index on "field".
AccessPath * FindBestQueryPlan | ( | THD * | thd, |
Query_block * | query_block | ||
) |
The main entry point for the hypergraph join optimizer; takes in a query block and returns an access path to execute it (or nullptr, for error).
It works as follows:
Materializing subqueries need some extra care. (These are typically IN subqueries that for whatever reason could not be rewritten to semijoin, e.g. because they have GROUP BY.) The decision on whether to materialize or not needs to be done cost-based, and depends both on the inner and outer query block, so it needs to be done cost-based. (Materializiation gives a high up-front cost, but each execution is cheaper, so it will depend on how many times we expect to execute the subquery and now expensive it is to run unmaterialized.) Following the flow through the different steps:
First of all, these go through a stage known as in2exists, rewriting them from e.g.
WHERE t1_outer.x IN ( SELECT t2.y FROM t2 GROUP BY ... )
to
WHERE EXISTS ( SELECT 1 FROM t2 GROUP BY ... HAVING t2.y = t1_outer.x )
This happens before the join optimizer, and the idea is that the HAVING condition (known as a “created_by_in2exists condition”, possibly in WHERE instead of HAVING) can be attempted pushed down into an index or similar, giving more efficient execution. However, if we want to materialize the subquery, these extra conditions need to be removed before materialization; not only do they give the wrong result, but they can also need to wrong costs and a suboptimal join order.
Thus, whenever we plan such a subquery, we plan it twice; once as usual, and then a second time with all in2exists conditions removed. This gives EstimateFilterCost() precise cost information for both cases, or at least as precise as the cost model itself is. In the outer query block, we can then weigh the two alternatives against each other when we add a filter with such a subquery; we can choose to materialize it or not, and propose both alternatives as with any other subplan. When we've decided on the final plan, we go through all access paths and actually materialize the subqueries it says to materialize.
There are lots of places these conditions can show up; to reduce complexity, we only consider materialization in the most common places (filters on base tables, filters after joins, filters from HAVING) – in particular, we don't bother checking on join conditions. It is never wrong to not materialize a subquery, though it may be suboptimal.
Note that the access path returned by FindBestQueryPlan() is not ready for immediate conversion to iterators; see FinalizePlanForQueryBlock(). You may call FindBestQueryPlan() any number of times for a query block, but FinalizePlanForQueryBlock() only once, as finalization generates temporary tables and may rewrite expressions in ways that are incompatible with future planning. The difference is most striking with the planning done twice by in2exists (see above).
thd | Thread handle. |
query_block | The query block to find a plan for. |
|
static |
Find the lowest-cost plan (which hopefully is also the cheapest to execute) of all the legal ways to execute the query.
The overall order of operations is largely dictated by the standard:
The place where we have the most leeway by far is #1, which is why this part of the optimizer is generally called the join optimizer (there are potentially billions of different join orderings, whereas each of the other steps, except windowing, can only be done in one or two ways). But the various outputs of #1 can have different properties, that can make for higher or lower costs in the other steps. (For instance, LIMIT will affect candidates with different init_cost differently, and ordering properties can skip sorting in ORDER BY entirely.) Thus, we allow keeping multiple candidates in play at every step if they are meaningfully different, and only pick out the winning candidate based on cost at the very end.
thd | Thread handle. | |
query_block | The query block to find a plan for. | |
[out] | retry | Gets set to true before returning if the caller should retry the call to this function. |
[in,out] | subgraph_pair_limit | The maximum number of subgraph pairs to inspect before invoking the graph simplifier. Also returns the subgraph pair limit to use in the next invocation if "retry" returns true. |
ensure full enumeration is done for primary engine.
void FindSargablePredicates | ( | THD * | thd, |
JoinHypergraph * | graph | ||
) |
|
static |
|
static |
Find out whether “item” is a sargable condition; if so, add it to: