MySQL 9.1.0
Source Code Documentation
|
CostingReceiver contains the main join planning logic, selecting access paths based on cost. More...
Classes | |
struct | AccessPathSet |
Besides the access paths for a set of nodes (see m_access_paths), AccessPathSet contains information that is common between all access paths for that set. More... | |
struct | FindRangeScansResult |
Return type for FindRangeScans(). More... | |
struct | ProposeRefsResult |
Return value of ProposeRefs. More... | |
Public Member Functions | |
CostingReceiver (THD *thd, Query_block *query_block, JoinHypergraph &graph, const LogicalOrderings *orderings, const Mem_root_array< SortAheadOrdering > *sort_ahead_orderings, const Mem_root_array< ActiveIndexInfo > *active_indexes, const Mem_root_array< SpatialDistanceScanInfo > *spatial_indexes, const Mem_root_array< FullTextIndexInfo > *fulltext_searches, NodeMap fulltext_tables, uint64_t sargable_fulltext_predicates, table_map update_delete_target_tables, table_map immediate_update_delete_candidates, bool need_rowid, SecondaryEngineFlags engine_flags, int subgraph_pair_limit, secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook, secondary_engine_check_optimizer_request_t secondary_engine_planning_complexity_check_hook) | |
CostingReceiver & | operator= (const CostingReceiver &)=delete |
CostingReceiver & | operator= (CostingReceiver &&)=default |
CostingReceiver (const CostingReceiver &)=delete | |
CostingReceiver (CostingReceiver &&)=default | |
bool | HasSeen (NodeMap subgraph) const |
bool | FoundSingleNode (int node_idx) |
bool | evaluate_secondary_engine_optimizer_state_request () |
bool | FoundSubgraphPair (NodeMap left, NodeMap right, int edge_idx) |
const AccessPathArray | root_candidates () |
FunctionalDependencySet | active_fds_at_root () const |
size_t | num_subplans () const |
size_t | num_access_paths () const |
int | subgraph_pair_limit () const |
bool | always_empty () const |
True if the result of the join is found to be always empty, typically because of an impossible WHERE clause. More... | |
AccessPath * | ProposeAccessPath (AccessPath *path, AccessPathArray *existing_paths, OrderingSet obsolete_orderings, const char *description_for_trace) const |
bool | HasSecondaryEngineCostHook () const |
Private Member Functions | |
string | PrintSet (NodeMap x) const |
For trace use only. More... | |
string | PrintSubgraphHeader (const JoinPredicate *edge, const AccessPath &join_path, NodeMap left, NodeMap right) const |
For trace use only. More... | |
bool | SupportedEngineFlag (SecondaryEngineFlag flag) const |
Checks whether the given engine flag is active or not. More... | |
bool | FindIndexRangeScans (int node_idx, bool *impossible, double *num_output_rows_after_filter, bool force_imerge, bool force_skip_scan, bool *found_forced_plan) |
bool | SetUpRangeScans (int node_idx, bool *impossible, double *num_output_rows_after_filter, RANGE_OPT_PARAM *param, SEL_TREE **tree, Mem_root_array< PossibleRangeScan > *possible_scans, Mem_root_array< PossibleIndexMerge > *index_merges, Mem_root_array< PossibleIndexSkipScan > *index_skip_scans, MutableOverflowBitset *all_predicates) |
void | ProposeRangeScans (int node_idx, double num_output_rows_after_filter, RANGE_OPT_PARAM *param, SEL_TREE *tree, Mem_root_array< PossibleRangeScan > *possible_scans, bool *found_range_scan) |
void | ProposeAllIndexMergeScans (int node_idx, double num_output_rows_after_filter, RANGE_OPT_PARAM *param, SEL_TREE *tree, const Mem_root_array< PossibleRangeScan > *possible_scans, const Mem_root_array< PossibleIndexMerge > *index_merges, bool *found_imerge) |
void | ProposeIndexMerge (TABLE *table, int node_idx, const SEL_IMERGE &imerge, int pred_idx, bool inexact, bool allow_clustered_primary_key_scan, int num_where_predicates, double num_output_rows_after_filter, RANGE_OPT_PARAM *param, bool *has_clustered_primary_key_scan, bool *found_imerge) |
void | ProposeRowIdOrderedUnion (TABLE *table, int node_idx, const SEL_IMERGE &imerge, int pred_idx, bool inexact, int num_where_predicates, double num_output_rows_after_filter, const RANGE_OPT_PARAM *param, const Mem_root_array< AccessPath * > &paths, bool *found_imerge) |
void | ProposeAllRowIdOrderedIntersectPlans (TABLE *table, int node_idx, SEL_TREE *tree, int num_where_predicates, const Mem_root_array< PossibleRORScan > &possible_ror_scans, double num_output_rows_after_filter, const RANGE_OPT_PARAM *param, bool *found_imerge) |
void | ProposeRowIdOrderedIntersect (TABLE *table, int node_idx, int num_where_predicates, const Mem_root_array< PossibleRORScan > &possible_ror_scans, const Mem_root_array< ROR_SCAN_INFO * > &ror_scans, ROR_SCAN_INFO *cpk_scan, double num_output_rows_after_filter, const RANGE_OPT_PARAM *param, OverflowBitset needed_fields, bool *found_imerge) |
void | ProposeAllSkipScans (int node_idx, double num_output_rows_after_filter, RANGE_OPT_PARAM *param, SEL_TREE *tree, Mem_root_array< PossibleIndexSkipScan > *index_skip_scans, MutableOverflowBitset *all_predicates, bool *found_skip_scan) |
void | ProposeIndexSkipScan (int node_idx, RANGE_OPT_PARAM *param, AccessPath *skip_scan_path, TABLE *table, OverflowBitset all_predicates, size_t num_where_predicates, size_t predicate_idx, double num_output_rows_after_filter, bool inexact) |
void | TraceAccessPaths (NodeMap nodes) |
void | ProposeAccessPathForBaseTable (int node_idx, double force_num_output_rows_after_filter, const char *description_for_trace, AccessPath *path) |
void | ProposeAccessPathForIndex (int node_idx, OverflowBitset applied_predicates, OverflowBitset subsumed_predicates, double force_num_output_rows_after_filter, const char *description_for_trace, AccessPath *path) |
void | ProposeAccessPathWithOrderings (NodeMap nodes, FunctionalDependencySet fd_set, OrderingSet obsolete_orderings, AccessPath *path, const char *description_for_trace) |
AccessPath * | MakeMaterializePath (const AccessPath &path, TABLE *table) const |
Make a path that materializes 'table'. More... | |
bool | ProposeTableScan (TABLE *table, int node_idx, double force_num_output_rows_after_filter) |
bool | ProposeIndexScan (TABLE *table, int node_idx, double force_num_output_rows_after_filter, unsigned key_idx, bool reverse, int ordering_idx) |
FindRangeScansResult | FindRangeScans (int node_idx, Table_ref *table_ref) |
Propose possible range scan access paths for a single node. More... | |
std::optional< ProposeRefsResult > | ProposeRefs (const ActiveIndexInfo &order_info, int node_idx, double row_estimate) |
Propose REF access paths for a single node and particular index. More... | |
bool | ProposeDistanceIndexScan (TABLE *table, int node_idx, double force_num_output_rows_after_filter, const SpatialDistanceScanInfo &order_info, int ordering_idx) |
bool | ProposeAllUniqueIndexLookupsWithConstantKey (int node_idx, bool *found) |
bool | RedundantThroughSargable (OverflowBitset redundant_against_sargable_predicates, const AccessPath *left_path, const AccessPath *right_path, NodeMap left, NodeMap right) |
pair< bool, bool > | AlreadyAppliedAsSargable (Item *condition, const AccessPath *left_path, const AccessPath *right_path) |
bool | ProposeAllFullTextIndexScans (TABLE *table, int node_idx, double force_num_output_rows_after_filter, bool *found_fulltext) |
bool | ProposeFullTextIndexScan (TABLE *table, int node_idx, Item_func_match *match, int predicate_idx, int ordering_idx, double force_num_output_rows_after_filter) |
void | ProposeNestedLoopJoin (NodeMap left, NodeMap right, AccessPath *left_path, AccessPath *right_path, const JoinPredicate *edge, bool rewrite_semi_to_inner, FunctionalDependencySet new_fd_set, OrderingSet new_obsolete_orderings, bool *wrote_trace) |
bool | AllowNestedLoopJoin (NodeMap left, NodeMap right, const AccessPath &left_path, const AccessPath &right_path, const JoinPredicate &edge) const |
void | ProposeHashJoin (NodeMap left, NodeMap right, AccessPath *left_path, AccessPath *right_path, const JoinPredicate *edge, FunctionalDependencySet new_fd_set, OrderingSet new_obsolete_orderings, bool rewrite_semi_to_inner, bool *wrote_trace) |
bool | AllowHashJoin (NodeMap left, NodeMap right, const AccessPath &left_path, const AccessPath &right_path, const JoinPredicate &edge) const |
void | ApplyPredicatesForBaseTable (int node_idx, OverflowBitset applied_predicates, OverflowBitset subsumed_predicates, bool materialize_subqueries, double force_num_output_rows_after_filter, AccessPath *path, FunctionalDependencySet *new_fd_set) |
void | ApplyDelayedPredicatesAfterJoin (NodeMap left, NodeMap right, const AccessPath *left_path, const AccessPath *right_path, int join_predicate_first, int join_predicate_last, bool materialize_subqueries, AccessPath *join_path, FunctionalDependencySet *new_fd_set) |
double | FindAlreadyAppliedSelectivity (const JoinPredicate *edge, const AccessPath *left_path, const AccessPath *right_path, NodeMap left, NodeMap right) |
void | CommitBitsetsToHeap (AccessPath *path) const |
bool | BitsetsAreCommitted (AccessPath *path) const |
Private Attributes | |
THD * | m_thd |
Query_block * | m_query_block |
The query block we are planning. More... | |
mem_root_unordered_map< NodeMap, AccessPathSet > | m_access_paths |
For each subset of tables that are connected in the join hypergraph, keeps the current best access paths for producing said subset. More... | |
int | m_num_seen_subgraph_pairs = 0 |
How many subgraph pairs we've seen so far. More... | |
JoinHypergraph * | m_graph |
The graph we are running over. More... | |
bool | has_clamped_multipart_eq_ref = false |
Whether we have applied clamping due to a multi-column EQ_REF at any point. More... | |
bool | has_semijoin_with_possibly_clamped_child = false |
Whether we have a semijoin where the inner child is parameterized on the outer child, and the row estimate of the inner child is possibly clamped, for example because of some other semijoin. More... | |
const LogicalOrderings * | m_orderings |
Keeps track of interesting orderings in this query block. More... | |
const Mem_root_array< SortAheadOrdering > * | m_sort_ahead_orderings |
List of all orderings that are candidates for sort-ahead (because they are, or may eventually become, an interesting ordering). More... | |
const Mem_root_array< ActiveIndexInfo > * | m_active_indexes |
List of all indexes that are active and that we can apply in this query. More... | |
const Mem_root_array< SpatialDistanceScanInfo > * | m_spatial_indexes |
List of all active spatial indexes that we can apply in this query. More... | |
const Mem_root_array< FullTextIndexInfo > * | m_fulltext_searches |
List of all active full-text indexes that we can apply in this query. More... | |
NodeMap | m_fulltext_tables = 0 |
A map of tables that are referenced by a MATCH function (those tables that have Table_ref::is_fulltext_searched() == true). More... | |
uint64_t | m_sargable_fulltext_predicates = 0 |
The set of WHERE predicates which are on a form that can be satisfied by a full-text index scan. More... | |
NodeMap | m_update_delete_target_nodes = 0 |
The target tables of an UPDATE or DELETE statement. More... | |
NodeMap | m_immediate_update_delete_candidates = 0 |
The set of tables that are candidates for immediate update or delete. More... | |
bool | m_need_rowid |
Whether we will be needing row IDs from our tables, typically for a later sort. More... | |
SecondaryEngineFlags | m_engine_flags |
The flags declared by the secondary engine. More... | |
int | m_subgraph_pair_limit |
The maximum number of pairs of subgraphs we are willing to accept, or -1 if no limit. More... | |
secondary_engine_modify_access_path_cost_t | m_secondary_engine_cost_hook |
Pointer to a function that modifies the cost estimates of an access path for execution in a secondary storage engine, or nullptr otherwise. More... | |
secondary_engine_check_optimizer_request_t | m_secondary_engine_planning_complexity_check |
Pointer to a function that returns what state should hypergraph progress for optimization with secondary storage engine, or nullptr otherwise. More... | |
NodeMap | forced_leftmost_table = 0 |
A map of tables that can never be on the right side of any join, ie., they have to be leftmost in the tree. More... | |
MEM_ROOT | m_overflow_bitset_mem_root |
A special MEM_ROOT for allocating OverflowBitsets that we might end up discarding, ie. More... | |
MEM_ROOT | m_range_optimizer_mem_root |
A special MEM_ROOT for temporary data for the range optimizer. More... | |
Friends | |
class | RefAccessBuilder |
CostingReceiver contains the main join planning logic, selecting access paths based on cost.
It receives subplans from DPhyp (see enumerate_subgraph.h), assigns them costs based on a cost model, and keeps the ones that are cheapest. In the end, this means it will be left with a root access path that gives the lowest total cost for joining the tables in the query block, ie., without ORDER BY etc.
Currently, besides the expected number of produced rows (which is the same no matter how we access the table) we keep only a single value per subplan (total cost), and thus also only a single best access path. In the future, we will have more dimensions to worry about, such as initial cost versus total cost (relevant for LIMIT), ordering properties, and so on. At that point, there is not necessarily a single “best” access path anymore, and we will need to keep multiple ones around, and test all of them as candidates when building larger subplans.
|
inline |
|
delete |
|
default |
|
inline |
|
private |
|
private |
|
inlineprivate |
|
inline |
True if the result of the join is found to be always empty, typically because of an impossible WHERE clause.
|
private |
|
private |
|
private |
|
private |
bool anonymous_namespace{join_optimizer.cc}::CostingReceiver::evaluate_secondary_engine_optimizer_state_request | ( | ) |
|
private |
|
private |
|
private |
Propose possible range scan access paths for a single node.
node_idx | The hypergraph node for which we need paths. |
table_ref | The table accessed by that node. |
bool anonymous_namespace{join_optimizer.cc}::CostingReceiver::FoundSingleNode | ( | int | node_idx | ) |
bool anonymous_namespace{join_optimizer.cc}::CostingReceiver::FoundSubgraphPair | ( | NodeMap | left, |
NodeMap | right, | ||
int | edge_idx | ||
) |
|
inline |
|
inline |
|
private |
Make a path that materializes 'table'.
path | The table access path for the materialized table. |
table | The table to materialize. |
|
inline |
|
inline |
|
delete |
|
default |
|
inlineprivate |
For trace use only.
|
private |
For trace use only.
AccessPath * anonymous_namespace{join_optimizer.cc}::CostingReceiver::ProposeAccessPath | ( | AccessPath * | path, |
AccessPathArray * | existing_paths, | ||
OrderingSet | obsolete_orderings, | ||
const char * | description_for_trace | ||
) | const |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
Propose REF access paths for a single node and particular index.
order_info | The index. |
node_idx | The hypergraph node. |
row_estimate | The estimated number of result rows. |
|
private |
|
private |
|
private |
|
private |
|
inline |
|
private |
|
inline |
|
inlineprivate |
Checks whether the given engine flag is active or not.
|
private |
|
friend |
|
private |
A map of tables that can never be on the right side of any join, ie., they have to be leftmost in the tree.
This only affects recursive table references (ie., when WITH RECURSIVE is in use); they work by continuously tailing new records, which wouldn't work if we were to scan them multiple times or put them in a hash table. Naturally, there must be zero or one bit here; the common case is zero.
|
private |
Whether we have applied clamping due to a multi-column EQ_REF at any point.
There is a known issue (see bug #33550360) where this can cause row count estimates to be inconsistent between different access paths. Obviously, we should fix this bug by adjusting the selectivities (and we do for single-column indexes), but for multipart indexes, this is nontrivial. See the bug for details on some ideas, but the gist of it is that we probably will want a linear program to adjust multi-selectivities so that they are consistent, and not above 1/N (for N-row tables) if there are unique indexes on them.
The only reason why we collect this information, like JoinHypergraph::has_reordered_left_joins, is to be able to assert on inconsistent row counts between APs, excluding this (known) issue.
|
private |
Whether we have a semijoin where the inner child is parameterized on the outer child, and the row estimate of the inner child is possibly clamped, for example because of some other semijoin.
In this case, we may see inconsistent row count estimates between the ordinary semijoin plan and the rewrite_semi_to_inner plan, because it's hard to tell how much the already-applied-as-sargable selectivity affected the row count estimate of the child.
The only reason why we collect this information, is to be able to assert on inconsistent row counts between access paths, excluding this known issue.
|
private |
For each subset of tables that are connected in the join hypergraph, keeps the current best access paths for producing said subset.
There can be several that are best in different ways; see comments on ProposeAccessPath().
Also used for communicating connectivity information back to DPhyp (in HasSeen()); if there's an entry here, that subset will induce a connected subgraph of the join hypergraph.
|
private |
List of all indexes that are active and that we can apply in this query.
Indexes can be useful in several ways: We can use them for ref access, for index-only scans, or to get interesting orderings.
|
private |
The flags declared by the secondary engine.
In particular, it describes what kind of access path types should not be created.
|
private |
List of all active full-text indexes that we can apply in this query.
|
private |
A map of tables that are referenced by a MATCH function (those tables that have Table_ref::is_fulltext_searched() == true).
It is used for preventing hash joins involving tables that are full-text searched.
|
private |
The graph we are running over.
|
private |
The set of tables that are candidates for immediate update or delete.
Immediate update/delete means that the rows from the table are deleted while reading the rows from the topmost iterator. (As opposed to buffered update/delete, where the row IDs are stored in temporary tables, and only updated or deleted after the topmost iterator has been read to the end.) The candidates are those target tables that are only referenced once in the query. The requirement for immediate deletion is that the deleted row will not have to be read again later. Currently, at most one of the candidate tables is chosen, and it is always the outermost table in the join tree.
|
private |
Whether we will be needing row IDs from our tables, typically for a later sort.
If this happens, derived tables cannot use streaming, but need an actual materialization, since filesort expects to be able to go back and ask for a given row. (This is different from when we need row IDs for weedout, which doesn't preclude streaming. The hypergraph optimizer does not use weedout.)
|
private |
How many subgraph pairs we've seen so far.
Used to give up if we end up allocating too many resources (prompting us to create a simpler join graph and try again).
|
private |
Keeps track of interesting orderings in this query block.
See LogicalOrderings for more information.
|
private |
A special MEM_ROOT for allocating OverflowBitsets that we might end up discarding, ie.
for AccessPaths that do not yet live in m_access_paths. For any AccessPath that is to have a permanent life (ie., not be immediately discarded as inferior), the OverflowBitset must be taken out of this MEM_ROOT and onto the regular one, as it is cleared often. (This significantly reduces the amount of memory used in situations where lots of AccessPaths are produced and discarded. Of course, it only matters for queries with >= 64 predicates.)
The copying is using CommitBitsetsToHeap(). ProposeAccessPath() will automatically call CommitBitsetsToHeap() for accepted access paths, but it will not call it on any of their children. Thus, if you've added more than one AccessPath in the chain (e.g. if you add a join, then a sort of that join, and then propose the sort), you will need to make sure there are no stray bitsets left on this MEM_ROOT.
Because this can be a source of subtle bugs, you should be conservative about what bitsets you put here; really, only the ones you could be allocating many of (like joins) should be candidates.
|
private |
The query block we are planning.
|
private |
A special MEM_ROOT for temporary data for the range optimizer.
It can be discarded immediately after we've decided on the range scans for a given table (but we reuse its memory as long as there are more tables left to scan).
|
private |
The set of WHERE predicates which are on a form that can be satisfied by a full-text index scan.
This includes calls to MATCH with no comparison operator, and predicates on the form MATCH > const or MATCH >= const (where const must be high enough to make the comparison return false for documents with zero score).
|
private |
Pointer to a function that modifies the cost estimates of an access path for execution in a secondary storage engine, or nullptr otherwise.
|
private |
Pointer to a function that returns what state should hypergraph progress for optimization with secondary storage engine, or nullptr otherwise.
|
private |
List of all orderings that are candidates for sort-ahead (because they are, or may eventually become, an interesting ordering).
|
private |
List of all active spatial indexes that we can apply in this query.
|
private |
The maximum number of pairs of subgraphs we are willing to accept, or -1 if no limit.
If this limit gets hit, we stop traversing the graph and return an error; the caller will then have to modify the hypergraph (see GraphSimplifier) to make for a graph with fewer options, so that planning time will come under an acceptable limit.
|
private |
|
private |
The target tables of an UPDATE or DELETE statement.