CostingReceiver contains the main join planning logic, selecting access paths based on cost.
More...
|
| CostingReceiver (THD *thd, Query_block *query_block, JoinHypergraph &graph, const LogicalOrderings *orderings, const Mem_root_array< SortAheadOrdering > *sort_ahead_orderings, const Mem_root_array< ActiveIndexInfo > *active_indexes, const Mem_root_array< FullTextIndexInfo > *fulltext_searches, NodeMap fulltext_tables, uint64_t sargable_fulltext_predicates, table_map update_delete_target_tables, table_map immediate_update_delete_candidates, bool need_rowid, SecondaryEngineFlags engine_flags, int subgraph_pair_limit, secondary_engine_modify_access_path_cost_t secondary_engine_cost_hook, string *trace) |
|
CostingReceiver & | operator= (const CostingReceiver &)=delete |
|
CostingReceiver & | operator= (CostingReceiver &&)=default |
|
| CostingReceiver (const CostingReceiver &)=delete |
|
| CostingReceiver (CostingReceiver &&)=default |
|
bool | HasSeen (NodeMap subgraph) const |
|
bool | FoundSingleNode (int node_idx) |
|
bool | FoundSubgraphPair (NodeMap left, NodeMap right, int edge_idx) |
|
const Prealloced_array< AccessPath *, 4 > & | root_candidates () |
|
FunctionalDependencySet | active_fds_at_root () const |
|
size_t | num_subplans () const |
|
size_t | num_access_paths () const |
|
bool | always_empty () const |
| True if the result of the join is found to be always empty, typically because of an impossible WHERE clause. More...
|
|
AccessPath * | ProposeAccessPath (AccessPath *path, Prealloced_array< AccessPath *, 4 > *existing_paths, OrderingSet obsolete_orderings, const char *description_for_trace) const |
|
bool | HasSecondaryEngineCostHook () const |
|
|
string | PrintSet (NodeMap x) const |
| For trace use only. More...
|
|
string | PrintSubgraphHeader (const JoinPredicate *edge, const AccessPath &join_path, NodeMap left, NodeMap right) const |
| For trace use only. More...
|
|
bool | SupportedEngineFlag (SecondaryEngineFlag flag) const |
| Checks whether the given engine flag is active or not. More...
|
|
bool | FindIndexRangeScans (int node_idx, bool *impossible, double *num_output_rows_after_filter) |
|
void | ProposeIndexMerge (TABLE *table, int node_idx, const SEL_IMERGE &imerge, int pred_idx, bool inexact, bool allow_clustered_primary_key_scan, int num_where_predicates, double num_output_rows_after_filter, RANGE_OPT_PARAM *param, bool *has_clustered_primary_key_scan) |
|
void | TraceAccessPaths (NodeMap nodes) |
|
void | ProposeAccessPathForBaseTable (int node_idx, double force_num_output_rows_after_filter, const char *description_for_trace, AccessPath *path) |
|
void | ProposeAccessPathForIndex (int node_idx, OverflowBitset applied_predicates, OverflowBitset subsumed_predicates, double force_num_output_rows_after_filter, const char *description_for_trace, AccessPath *path) |
|
void | ProposeAccessPathWithOrderings (NodeMap nodes, FunctionalDependencySet fd_set, OrderingSet obsolete_orderings, AccessPath *path, const char *description_for_trace) |
|
bool | ProposeTableScan (TABLE *table, int node_idx, double force_num_output_rows_after_filter) |
|
bool | ProposeIndexScan (TABLE *table, int node_idx, double force_num_output_rows_after_filter, unsigned key_idx, bool reverse, int ordering_idx) |
|
bool | ProposeRefAccess (TABLE *table, int node_idx, unsigned key_idx, double force_num_output_rows_after_filter, bool reverse, table_map allowed_parameter_tables, int ordering_idx) |
|
bool | ProposeAllUniqueIndexLookupsWithConstantKey (int node_idx, bool *found) |
|
bool | RedundantThroughSargable (OverflowBitset redundant_against_sargable_predicates, const AccessPath *left_path, const AccessPath *right_path, NodeMap left, NodeMap right) |
|
pair< bool, bool > | AlreadyAppliedAsSargable (Item *condition, const AccessPath *left_path, const AccessPath *right_path) |
|
bool | ProposeAllFullTextIndexScans (TABLE *table, int node_idx, double force_num_output_rows_after_filter) |
|
bool | ProposeFullTextIndexScan (TABLE *table, int node_idx, Item_func_match *match, int predicate_idx, int ordering_idx, double force_num_output_rows_after_filter) |
|
void | ProposeNestedLoopJoin (NodeMap left, NodeMap right, AccessPath *left_path, AccessPath *right_path, const JoinPredicate *edge, bool rewrite_semi_to_inner, FunctionalDependencySet new_fd_set, OrderingSet new_obsolete_orderings, bool *wrote_trace) |
|
void | ProposeHashJoin (NodeMap left, NodeMap right, AccessPath *left_path, AccessPath *right_path, const JoinPredicate *edge, FunctionalDependencySet new_fd_set, OrderingSet new_obsolete_orderings, bool rewrite_semi_to_inner, bool *wrote_trace) |
|
void | ApplyPredicatesForBaseTable (int node_idx, OverflowBitset applied_predicates, OverflowBitset subsumed_predicates, bool materialize_subqueries, AccessPath *path, FunctionalDependencySet *new_fd_set) |
|
void | ApplyDelayedPredicatesAfterJoin (NodeMap left, NodeMap right, const AccessPath *left_path, const AccessPath *right_path, int join_predicate_first, int join_predicate_last, bool materialize_subqueries, AccessPath *join_path, FunctionalDependencySet *new_fd_set) |
|
double | FindAlreadyAppliedSelectivity (const JoinPredicate *edge, const AccessPath *left_path, const AccessPath *right_path, NodeMap left, NodeMap right) |
|
void | CommitBitsetsToHeap (AccessPath *path) const |
|
bool | BitsetsAreCommitted (AccessPath *path) const |
|
|
THD * | m_thd |
|
Query_block * | m_query_block |
| The query block we are planning. More...
|
|
mem_root_unordered_map< NodeMap, AccessPathSet > | m_access_paths |
| For each subset of tables that are connected in the join hypergraph, keeps the current best access paths for producing said subset. More...
|
|
int | m_num_seen_subgraph_pairs = 0 |
| How many subgraph pairs we've seen so far. More...
|
|
JoinHypergraph * | m_graph |
| The graph we are running over. More...
|
|
bool | has_clamped_multipart_eq_ref = false |
| Whether we have applied clamping due to a multi-column EQ_REF at any point. More...
|
|
bool | has_semijoin_with_possibly_clamped_child = false |
| Whether we have a semijoin where the inner child is parameterized on the outer child, and the row estimate of the inner child is possibly clamped, for example because of some other semijoin. More...
|
|
const LogicalOrderings * | m_orderings |
| Keeps track of interesting orderings in this query block. More...
|
|
const Mem_root_array< SortAheadOrdering > * | m_sort_ahead_orderings |
| List of all orderings that are candidates for sort-ahead (because they are, or may eventually become, an interesting ordering). More...
|
|
const Mem_root_array< ActiveIndexInfo > * | m_active_indexes |
| List of all indexes that are active and that we can apply in this query. More...
|
|
const Mem_root_array< FullTextIndexInfo > * | m_fulltext_searches |
| List of all active full-text indexes that we can apply in this query. More...
|
|
NodeMap | m_fulltext_tables = 0 |
| A map of tables that are referenced by a MATCH function (those tables that have Table_ref::is_fulltext_searched() == true). More...
|
|
uint64_t | m_sargable_fulltext_predicates = 0 |
| The set of WHERE predicates which are on a form that can be satisfied by a full-text index scan. More...
|
|
NodeMap | m_update_delete_target_nodes = 0 |
| The target tables of an UPDATE or DELETE statement. More...
|
|
NodeMap | m_immediate_update_delete_candidates = 0 |
| The set of tables that are candidates for immediate update or delete. More...
|
|
bool | m_need_rowid |
| Whether we will be needing row IDs from our tables, typically for a later sort. More...
|
|
SecondaryEngineFlags | m_engine_flags |
| The flags declared by the secondary engine. More...
|
|
int | m_subgraph_pair_limit |
| The maximum number of pairs of subgraphs we are willing to accept, or -1 if no limit. More...
|
|
secondary_engine_modify_access_path_cost_t | m_secondary_engine_cost_hook |
| Pointer to a function that modifies the cost estimates of an access path for execution in a secondary storage engine, or nullptr otherwise. More...
|
|
string * | m_trace |
| If not nullptr, we store human-readable optimizer trace information here. More...
|
|
NodeMap | forced_leftmost_table = 0 |
| A map of tables that can never be on the right side of any join, ie., they have to be leftmost in the tree. More...
|
|
MEM_ROOT | m_overflow_bitset_mem_root |
| A special MEM_ROOT for allocating OverflowBitsets that we might end up discarding, ie. More...
|
|
MEM_ROOT | m_range_optimizer_mem_root |
| A special MEM_ROOT for temporary data for the range optimizer. More...
|
|
CostingReceiver contains the main join planning logic, selecting access paths based on cost.
It receives subplans from DPhyp (see enumerate_subgraph.h), assigns them costs based on a cost model, and keeps the ones that are cheapest. In the end, this means it will be left with a root access path that gives the lowest total cost for joining the tables in the query block, ie., without ORDER BY etc.
Currently, besides the expected number of produced rows (which is the same no matter how we access the table) we keep only a single value per subplan (total cost), and thus also only a single best access path. In the future, we will have more dimensions to worry about, such as initial cost versus total cost (relevant for LIMIT), ordering properties, and so on. At that point, there is not necessarily a single “best” access path anymore, and we will need to keep multiple ones around, and test all of them as candidates when building larger subplans.
bool anonymous_namespace{join_optimizer.cc}::CostingReceiver::has_clamped_multipart_eq_ref = false |
|
private |
Whether we have applied clamping due to a multi-column EQ_REF at any point.
There is a known issue (see bug #33550360) where this can cause row count estimates to be inconsistent between different access paths. Obviously, we should fix this bug by adjusting the selectivities (and we do for single-column indexes), but for multipart indexes, this is nontrivial. See the bug for details on some ideas, but the gist of it is that we probably will want a linear program to adjust multi-selectivities so that they are consistent, and not above 1/N (for N-row tables) if there are unique indexes on them.
The only reason why we collect this information, like JoinHypergraph::has_reordered_left_joins, is to be able to assert on inconsistent row counts between APs, excluding this (known) issue.
bool anonymous_namespace{join_optimizer.cc}::CostingReceiver::has_semijoin_with_possibly_clamped_child = false |
|
private |
Whether we have a semijoin where the inner child is parameterized on the outer child, and the row estimate of the inner child is possibly clamped, for example because of some other semijoin.
In this case, we may see inconsistent row count estimates between the ordinary semijoin plan and the rewrite_semi_to_inner plan, because it's hard to tell how much the already-applied-as-sargable selectivity affected the row count estimate of the child.
The only reason why we collect this information, is to be able to assert on inconsistent row counts between access paths, excluding this known issue.
MEM_ROOT anonymous_namespace{join_optimizer.cc}::CostingReceiver::m_overflow_bitset_mem_root |
|
private |
A special MEM_ROOT for allocating OverflowBitsets that we might end up discarding, ie.
for AccessPaths that do not yet live in m_access_paths. For any AccessPath that is to have a permanent life (ie., not be immediately discarded as inferior), the OverflowBitset must be taken out of this MEM_ROOT and onto the regular one, as it is cleared often. (This significantly reduces the amount of memory used in situations where lots of AccessPaths are produced and discarded. Of course, it only matters for queries with >= 64 predicates.)
The copying is using CommitBitsetsToHeap(). ProposeAccessPath() will automatically call CommitBitsetsToHeap() for accepted access paths, but it will not call it on any of their children. Thus, if you've added more than one AccessPath in the chain (e.g. if you add a join, then a sort of that join, and then propose the sort), you will need to make sure there are no stray bitsets left on this MEM_ROOT.
Because this can be a source of subtle bugs, you should be conservative about what bitsets you put here; really, only the ones you could be allocating many of (like joins) should be candidates.