![]() |
MySQL 9.1.0
Source Code Documentation
|
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path contains pretty much exactly the information needed to instantiate given iterator, plus some information that is only needed during planning, such as costs. More...
#include <access_path.h>
Public Types | |
enum | Type : uint8_t { TABLE_SCAN , SAMPLE_SCAN , INDEX_SCAN , INDEX_DISTANCE_SCAN , REF , REF_OR_NULL , EQ_REF , PUSHED_JOIN_REF , FULL_TEXT_SEARCH , CONST_TABLE , MRR , FOLLOW_TAIL , INDEX_RANGE_SCAN , INDEX_MERGE , ROWID_INTERSECTION , ROWID_UNION , INDEX_SKIP_SCAN , GROUP_INDEX_SKIP_SCAN , DYNAMIC_INDEX_RANGE_SCAN , TABLE_VALUE_CONSTRUCTOR , FAKE_SINGLE_ROW , ZERO_ROWS , ZERO_ROWS_AGGREGATED , MATERIALIZED_TABLE_FUNCTION , UNQUALIFIED_COUNT , NESTED_LOOP_JOIN , NESTED_LOOP_SEMIJOIN_WITH_DUPLICATE_REMOVAL , BKA_JOIN , HASH_JOIN , FILTER , SORT , AGGREGATE , TEMPTABLE_AGGREGATE , LIMIT_OFFSET , STREAM , MATERIALIZE , MATERIALIZE_INFORMATION_SCHEMA_TABLE , APPEND , WINDOW , WEEDOUT , REMOVE_DUPLICATES , REMOVE_DUPLICATES_ON_INDEX , ALTERNATIVE , CACHE_INVALIDATOR , DELETE_ROWS , UPDATE_ROWS } |
enum | Safety : uint8_t { SAFE = 0 , SAFE_IF_SCANNED_ONCE = 1 , UNSAFE = 2 } |
A general enum to describe the safety of a given operation. More... | |
Public Member Functions | |
double | cost () const |
double | init_cost () const |
double | first_row_cost () const |
The cost of reading the first row. More... | |
double | init_once_cost () const |
double | cost_before_filter () const |
void | set_cost (double val) |
void | set_init_cost (double val) |
void | set_init_once_cost (double val) |
void | set_cost_before_filter (double val) |
double | rescan_cost () const |
Return the cost of scanning the given path for the second time (or later) in the given query block. More... | |
OverflowBitset & | applied_sargable_join_predicates () |
Bitmap of sargable join predicates that have already been applied in this access path by means of an index lookup (ref access), again referring to “predicates”, and thus should not be counted again for selectivity. More... | |
const OverflowBitset & | applied_sargable_join_predicates () const |
OverflowBitset & | subsumed_sargable_join_predicates () |
Similar to applied_sargable_join_predicates, bitmap of sargable join predicates that have been applied and will subsume the join predicate entirely, ie., not only should the selectivity not be double-counted, but the predicate itself is redundant and need not be applied as a filter. More... | |
const OverflowBitset & | subsumed_sargable_join_predicates () const |
auto & | table_scan () |
const auto & | table_scan () const |
auto & | sample_scan () |
const auto & | sample_scan () const |
auto & | index_scan () |
const auto & | index_scan () const |
auto & | index_distance_scan () |
const auto & | index_distance_scan () const |
auto & | ref () |
const auto & | ref () const |
auto & | ref_or_null () |
const auto & | ref_or_null () const |
auto & | eq_ref () |
const auto & | eq_ref () const |
auto & | pushed_join_ref () |
const auto & | pushed_join_ref () const |
auto & | full_text_search () |
const auto & | full_text_search () const |
auto & | const_table () |
const auto & | const_table () const |
auto & | mrr () |
const auto & | mrr () const |
auto & | follow_tail () |
const auto & | follow_tail () const |
auto & | index_range_scan () |
const auto & | index_range_scan () const |
auto & | index_merge () |
const auto & | index_merge () const |
auto & | rowid_intersection () |
const auto & | rowid_intersection () const |
auto & | rowid_union () |
const auto & | rowid_union () const |
auto & | index_skip_scan () |
const auto & | index_skip_scan () const |
auto & | group_index_skip_scan () |
const auto & | group_index_skip_scan () const |
auto & | dynamic_index_range_scan () |
const auto & | dynamic_index_range_scan () const |
auto & | materialized_table_function () |
const auto & | materialized_table_function () const |
auto & | unqualified_count () |
const auto & | unqualified_count () const |
auto & | table_value_constructor () |
const auto & | table_value_constructor () const |
auto & | fake_single_row () |
const auto & | fake_single_row () const |
auto & | zero_rows () |
const auto & | zero_rows () const |
auto & | zero_rows_aggregated () |
const auto & | zero_rows_aggregated () const |
auto & | hash_join () |
const auto & | hash_join () const |
auto & | bka_join () |
const auto & | bka_join () const |
auto & | nested_loop_join () |
const auto & | nested_loop_join () const |
auto & | nested_loop_semijoin_with_duplicate_removal () |
const auto & | nested_loop_semijoin_with_duplicate_removal () const |
auto & | filter () |
const auto & | filter () const |
auto & | sort () |
const auto & | sort () const |
auto & | aggregate () |
const auto & | aggregate () const |
auto & | temptable_aggregate () |
const auto & | temptable_aggregate () const |
auto & | limit_offset () |
const auto & | limit_offset () const |
auto & | stream () |
const auto & | stream () const |
auto & | materialize () |
const auto & | materialize () const |
auto & | materialize_information_schema_table () |
const auto & | materialize_information_schema_table () const |
auto & | append () |
const auto & | append () const |
auto & | window () |
const auto & | window () const |
auto & | weedout () |
const auto & | weedout () const |
auto & | remove_duplicates () |
const auto & | remove_duplicates () const |
auto & | remove_duplicates_on_index () |
const auto & | remove_duplicates_on_index () const |
auto & | alternative () |
const auto & | alternative () const |
auto & | cache_invalidator () |
const auto & | cache_invalidator () const |
auto & | delete_rows () |
const auto & | delete_rows () const |
auto & | update_rows () |
const auto & | update_rows () const |
double | num_output_rows () const |
void | set_num_output_rows (double val) |
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path contains pretty much exactly the information needed to instantiate given iterator, plus some information that is only needed during planning, such as costs.
(The new join optimizer will extend this somewhat in the future. Some iterators also need the query block, ie., JOIN object, they are part of, but that is implicitly available when constructing the tree.)
AccessPath objects build on a variant, ie., they can hold an access path of any type (table scan, filter, hash join, sort, etc.), although only one at the same time. Currently, they contain 32 bytes of base information that is common to any access path (type identifier, costs, etc.), and then up to 40 bytes that is type-specific (e.g. for a table scan, the TABLE object). It would be nice if we could squeeze it down to 64 and fit a cache line exactly, but it does not seem to be easy without fairly large contortions.
We could have solved this by inheritance, but the fixed-size design makes it possible to replace an access path when a better one is found, without introducing a new allocation, which will be important when using them as a planning structure.
enum AccessPath::Safety : uint8_t |
A general enum to describe the safety of a given operation.
Currently we only use this to describe row IDs, but it can easily be reused for safety of updating a table we're reading from (the Halloween problem), or just generally unreproducible results (e.g. a TABLESAMPLE changing due to external factors).
Less safe values have higher numerical values.
enum AccessPath::Type : uint8_t |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Bitmap of sargable join predicates that have already been applied in this access path by means of an index lookup (ref access), again referring to “predicates”, and thus should not be counted again for selectivity.
Note that the filter may need to be applied nevertheless (especially in case of type conversions); see subsumed_sargable_join_predicates.
Since these refer to the same array as filter_predicates, they will never overlap with filter_predicates, and so we can reuse the same memory using an alias (a union would not be allowed, since OverflowBitset is a class with non-trivial default constructor), even though the meaning is entirely separate. If N = num_where_predicates in the hypergraph, then bits 0..(N-1) belong to filter_predicates, and the rest to applied_sargable_join_predicates.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
The cost of reading the first row.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Return the cost of scanning the given path for the second time (or later) in the given query block.
This is really the interesting metric, not init_once_cost in itself, but since nearly all paths have zero init_once_cost, storing that instead allows us to skip a lot of repeated path->init_once_cost = path->init_cost calls in the code.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Similar to applied_sargable_join_predicates, bitmap of sargable join predicates that have been applied and will subsume the join predicate entirely, ie., not only should the selectivity not be double-counted, but the predicate itself is redundant and need not be applied as a filter.
(It is an error to have a bit set here but not in applied_sargable_join_predicates.)
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
struct { ... } AccessPath::aggregate |
bool AccessPath::allow_clustered_primary_key_scan |
bool AccessPath::allow_spill_to_disk |
bool AccessPath::already_expanded_predicates |
struct { ... } AccessPath::alternative |
struct { ... } AccessPath::append |
struct { ... } AccessPath::bka_join |
AccessPath* AccessPath::bka_path |
struct { ... } AccessPath::cache_invalidator |
bool AccessPath::can_be_used_for_imerge |
bool AccessPath::can_be_used_for_ror |
const char* AccessPath::cause |
AccessPath* AccessPath::child |
Mem_root_array<AccessPath *>* AccessPath::children |
Mem_root_array<AppendPathParameters>* AccessPath::children |
Item* AccessPath::condition |
struct { ... } AccessPath::const_table |
bool AccessPath::count_all_rows |
bool AccessPath::count_examined_rows |
Whether this access path counts as one that scans a base table, and thus should be counted towards examined_rows.
It can sometimes seem a bit arbitrary which iterators count towards examined_rows and which ones do not, so the only canonical reference is the tests.
AccessPath* AccessPath::cpk_child |
OverflowBitset AccessPath::delayed_predicates {0} |
Bitmap of WHERE predicates that touch tables we have joined in, but that we could not apply yet (for instance because they reference other tables, or because because we could not push them down into the nullable side of outer joins).
Used during planning only (see filter_predicates).
struct { ... } AccessPath::delete_rows |
struct { ... } AccessPath::dynamic_index_range_scan |
struct { ... } AccessPath::eq_ref |
OverflowBitset AccessPath::equijoin_predicates |
struct { ... } AccessPath::fake_single_row |
Filesort* AccessPath::filesort |
struct { ... } AccessPath::filter |
OverflowBitset AccessPath::filter_predicates {0} |
Bitmap of WHERE predicates that we are including on this access path, referring to the “predicates” array internal to the join optimizer.
Since bit masks are much cheaper to deal with than creating Item objects, and we don't invent new conditions during join optimization (all of them are known when we begin optimization), we stick to manipulating bit masks during optimization, saying which filters will be applied at this node (a 1-bit means the filter will be applied here; if there are multiple ones, they are ANDed together).
This is used during join optimization only; before iterators are created, we will add FILTER access paths to represent these instead, removing the dependency on the array. Said FILTER paths are by convention created with materialize_subqueries = false, since the by far most common case is that there are no subqueries in the predicate. In other words, if you wish to represent a filter with materialize_subqueries = true, you will need to make an explicit FILTER node.
See also nested_loop_join().equijoin_predicates, which is for filters being applied before nested-loop joins, but is otherwise the same idea.
struct { ... } AccessPath::follow_tail |
bool AccessPath::force_sort_rowids |
bool AccessPath::forced_by_dbug |
Whether this access path is forced preferred over all others by means of a SET DEBUG force_subplan_0x... statement.
bool AccessPath::forced_by_hint |
Item_func_match* AccessPath::ft_func |
struct { ... } AccessPath::full_text_search |
bool AccessPath::geometry |
struct { ... } AccessPath::group_index_skip_scan |
Item** AccessPath::group_items |
int AccessPath::group_items_size |
bool AccessPath::has_group_skip_scan |
Whether this access path contains a GROUP_INDEX_SKIP_SCAN.
struct { ... } AccessPath::hash_join |
int AccessPath::idx |
table_map AccessPath::immediate_tables |
int8_t AccessPath::immediate_update_delete_table {-1} |
For UPDATE and DELETE statements: The node index of a table which can be updated or deleted from immediately as the rows are read from the iterator, if this path is only read from once.
-1 if there is no such table in this path.
Note that this is an index into CostingReceiver's array of nodes, and is not necessarily equal to the table number within the query block given by Table_ref::tableno().
The table, if any, is currently always the outermost table in the path.
It is possible to have plans where it would be safe to operate "immediately" on more than one table. For example, if we do a merge join, it is safe to perform immediate deletes on tables on the inner side of the join, since both sides are read only once. (However, we currently do not support merge joins.)
Another possibility is when the outer table of a nested loop join is guaranteed to return at most one row (typically, a unique index lookup aka. eq_ref). Then it's safe to delete immediately from both sides of the nested loop join. But we don't to this yet.
Hash joins read both sides exactly once, However, with hash joins, the scans on the inner tables are not positioned on the correct row when the result of the join is returned, so the immediate delete logic will need to be changed to reposition the underlying scans before doing the immediate deletes. While this can be done, it makes the benefit of immediate deletes less obvious for these tables, and it can also be a loss in some cases, because we lose the deduplication provided by the Unique object used for buffered deletes (the immediate deletes could end up spending time repositioning to already deleted rows). So we currently don't attempt to do immediate deletes from inner tables of hash joins either.
The outer table of a hash join can be deleted from immediately if the inner table fits in memory. If the hash join spills to disk, though, neither the rows of the outer table nor the rows of the inner table come out in the order of the underlying scan, so it is not safe in general to perform immediate deletes on the outer table of a hash join.
If support for immediate operations on multiple tables is added, this member could be changed from a node index to a NodeMap.
unsigned AccessPath::index |
struct { ... } AccessPath::index_distance_scan |
struct { ... } AccessPath::index_merge |
struct { ... } AccessPath::index_range_scan |
struct { ... } AccessPath::index_scan |
struct { ... } AccessPath::index_skip_scan |
AccessPath * AccessPath::inner |
bool AccessPath::is_covering |
bool AccessPath::is_unique |
RowIterator* AccessPath::iterator = nullptr |
If an iterator has been instantiated for this access path, points to the iterator.
Used for constructing iterators that need to talk to each other (e.g. for recursive CTEs, or BKA join), and also for locating timing information in EXPLAIN ANALYZE queries.
JOIN* AccessPath::join |
const JoinPredicate* AccessPath::join_predicate |
JoinType AccessPath::join_type |
bool AccessPath::keep_current_rowid |
KEY* AccessPath::key |
size_t AccessPath::key_len |
ha_rows AccessPath::limit |
struct { ... } AccessPath::limit_offset |
unsigned AccessPath::loosescan_key_len |
|
private |
Expected cost to read all of this access path once.
|
private |
If no filter, identical to cost.
init_cost is always the same (filters have zero initialization cost).
|
private |
Expected cost to initialize this access path; ie., cost to read k out of N rows would be init_cost + (k/N) * (cost - init_cost).
Note that EXPLAIN prints out cost of reading the first row because it is easier for the user and also easier to measure in EXPLAIN ANALYZE, but it is easier to do calculations with a pure initialization cost, so that is what we use in this member. kUnknownCost for unknown.
|
private |
Of init_cost, how much of the initialization needs only to be done once per query block.
(This is a cost, not a proportion.) Ie., if the access path can reuse some its initialization work if Init() is called multiple times, this member will be nonzero. A typical example is a materialized table with rematerialize=false; the second time Init() is called, it's a no-op. Most paths will have init_once_cost = 0.0, ie., repeated scans will cost the same. We do not intend to use this field to model cache effects.
This is currently not printed in EXPLAIN, only optimizer trace.
|
private |
Expected number of output rows.
struct { ... } AccessPath::materialize |
struct { ... } AccessPath::materialize_information_schema_table |
bool AccessPath::materialize_subqueries |
struct { ... } AccessPath::materialized_table_function |
struct { ... } AccessPath::mrr |
unsigned AccessPath::mrr_buf_size |
int AccessPath::mrr_flags |
unsigned AccessPath::mrr_flags |
unsigned AccessPath::mrr_length_per_rec |
const char* AccessPath::name |
bool AccessPath::need_rows_in_rowid_order |
bool AccessPath::needs_buffering |
struct { ... } AccessPath::nested_loop_join |
struct { ... } AccessPath::nested_loop_semijoin_with_duplicate_removal |
double AccessPath::num_output_rows_before_filter {kUnknownRowCount} |
If no filter, identical to num_output_rows.
unsigned AccessPath::num_ranges |
unsigned AccessPath::num_used_key_parts |
ha_rows AccessPath::offset |
olap_type AccessPath::olap |
ORDER* AccessPath::order |
int AccessPath::ordering_state = 0 |
Which ordering the rows produced by this path follow, if any (see interesting_orders.h).
This is really a LogicalOrderings::StateIndex, but we don't want to add a dependency on interesting_orders.h from this file, so we use the base type instead of the typedef here.
AccessPath* AccessPath::outer |
Mem_root_array<Item_values_column *>* AccessPath::output_refs |
IndexSkipScanParameters* AccessPath::param |
GroupIndexSkipScanParameters* AccessPath::param |
MaterializePathParameters* AccessPath::param |
hypergraph::NodeMap AccessPath::parameter_tables {0} |
If nonzero, a bitmap of other tables whose joined-in rows must already be loaded when rows from this access path are evaluated; that is, this access path must be put on the inner side of a nested-loop join (or multiple such joins) where the outer side includes all of the given tables.
The most obvious case for this is dependent tables in LATERAL, but a more common case is when we have pushed join conditions referring to those tables; e.g., if this access path represents t1 and we have a condition t1.x=t2.x that is pushed down into an index lookup (ref access), t2 will be set in this bitmap. We can still join in other tables, deferring t2, but the bit(s) will then propagate, and we cannot be on the right side of a hash join until parameter_tables is zero again. (Also see DisallowParameterizedJoinPath() for when we disallow such deferring, as an optimization.)
As a special case, we allow setting RAND_TABLE_BIT, even though it is normally part of a table_map, not a NodeMap. In this case, it specifies that the access path is entirely noncachable, because it depends on something nondeterministic or an outer reference, and thus can never be on the right side of a hash join, ever.
bool AccessPath::pfs_batch_mode |
bool AccessPath::provide_rowid |
struct { ... } AccessPath::pushed_join_ref |
QEP_TAB* AccessPath::qep_tab |
QUICK_RANGE* AccessPath::range |
QUICK_RANGE** AccessPath::ranges |
float AccessPath::rec_per_key |
Index_lookup* AccessPath::ref |
struct { ... } AccessPath::ref |
struct { ... } AccessPath::ref_or_null |
int AccessPath::ref_slice |
bool AccessPath::reject_multiple_rows |
bool AccessPath::remove_duplicates |
struct { ... } AccessPath::remove_duplicates |
struct { ... } AccessPath::remove_duplicates_on_index |
bool AccessPath::retrieve_full_rows |
bool AccessPath::reuse_handler |
bool AccessPath::reverse |
bool AccessPath::rewrite_semi_to_inner |
struct { ... } AccessPath::rowid_intersection |
struct { ... } AccessPath::rowid_union |
Whether it is safe to get row IDs (for sorting) from this access path.
struct { ... } AccessPath::sample_scan |
double AccessPath::sampling_percentage |
enum tablesample_type AccessPath::sampling_type |
void* AccessPath::secondary_engine_data {nullptr} |
Auxiliary data used by a secondary storage engine while processing the access path during optimization and execution.
The secondary storage engine is free to store any useful information in this member, for example extra statistics or cost estimates. The data pointed to is fully owned by the secondary storage engine, and it is the responsibility of the secondary engine to manage the memory and make sure it is properly destroyed.
ha_rows* AccessPath::send_records_override |
struct { ... } AccessPath::sort |
bool AccessPath::store_rowids |
struct { ... } AccessPath::stream |
double AccessPath::subquery_cost |
The total cost of executing the queries that we materialize.
AccessPath* AccessPath::subquery_path |
double AccessPath::subquery_rows |
The number of materialized rows (as opposed to the number of rows fetched by table_path).
Needed for 'explain'.
TABLE* AccessPath::table |
const TABLE* AccessPath::table |
Table_function* AccessPath::table_function |
Table_ref* AccessPath::table_list |
AccessPath* AccessPath::table_path |
struct { ... } AccessPath::table_scan |
AccessPath* AccessPath::table_scan_path |
struct { ... } AccessPath::table_value_constructor |
table_map AccessPath::tables_to_delete_from |
table_map AccessPath::tables_to_get_rowid_for |
table_map AccessPath::tables_to_update |
TABLE* AccessPath::temp_table |
Temp_table_param* AccessPath::temp_table_param |
struct { ... } AccessPath::temptable_aggregate |
enum AccessPath::Type AccessPath::type |
union { ... } AccessPath::u |
struct { ... } AccessPath::unqualified_count |
bool AccessPath::unwrap_rollup |
struct { ... } AccessPath::update_rows |
bool AccessPath::use_limit |
bool AccessPath::use_order |
KEY_PART* AccessPath::used_key_part |
Index_lookup* AccessPath::used_ref |
bool AccessPath::using_extended_key_parts |
struct { ... } AccessPath::weedout |
SJ_TMP_TABLE* AccessPath::weedout_table |
Window* AccessPath::window |
struct { ... } AccessPath::window |
struct { ... } AccessPath::zero_rows |
struct { ... } AccessPath::zero_rows_aggregated |