MySQL 8.0.30
Source Code Documentation
LogicalOrderings Class Reference

#include <interesting_orders.h>

Classes

struct  DFSMEdge
 
struct  DFSMState
 
struct  ItemInfo
 
struct  NFSMEdge
 
struct  NFSMState
 
struct  OrderingWithInfo
 

Public Types

using StateIndex = int
 

Public Member Functions

 LogicalOrderings (THD *thd)
 
ItemHandle GetHandle (Item *item)
 
Itemitem (ItemHandle item) const
 
int AddOrdering (THD *thd, Ordering order, bool interesting, bool used_at_end, table_map homogenize_tables)
 
int num_orderings () const
 
Ordering ordering (int ordering_idx) const
 
bool ordering_is_relevant_for_sortahead (int ordering_idx) const
 
int AddFunctionalDependency (THD *thd, FunctionalDependency fd)
 
int num_fds () const
 
void SetHeadForAggregates (Bounds_checked_array< ItemHandle > head)
 
void SetRollup (bool rollup)
 
void Build (THD *thd, std::string *trace)
 
int RemapOrderingIndex (int ordering_idx) const
 
StateIndex SetOrder (int ordering_idx) const
 
FunctionalDependencySet GetFDSet (int fd_idx) const
 
StateIndex ApplyFDs (StateIndex state_idx, FunctionalDependencySet fds) const
 
bool DoesFollowOrder (StateIndex state_idx, int ordering_idx) const
 
bool MoreOrderedThan (StateIndex a_idx, StateIndex b_idx, std::bitset< kMaxSupportedOrderings > ignored_orderings) const
 

Private Member Functions

bool ItemHandleBeforeInGroup (ItemHandle a, ItemHandle b)
 
bool ItemBeforeInGroup (const OrderElement &a, const OrderElement &b)
 
int AddOrderingInternal (THD *thd, Ordering order, OrderingWithInfo::Type type, bool used_at_end, table_map homogenize_tables)
 
void PruneUninterestingOrders (THD *thd)
 Try to get rid of uninteresting orders, possibly by discarding irrelevant suffixes and merging them with others. More...
 
void PruneFDs (THD *thd)
 
bool ImpliedByEarlierElements (ItemHandle item, Ordering prefix, bool all_fds) const
 Checks whether the given item is redundant given previous elements in the ordering; ie., whether adding it will never change the ordering. More...
 
void BuildEquivalenceClasses ()
 
void RecanonicalizeGroupings ()
 
void AddFDsFromComputedItems (THD *thd)
 Try to add new FDs from items that are not base items; e.g., if we have an item (a + 1), we add {a} → (a + 1) (since addition is deterministic). More...
 
void AddFDsFromAggregateItems (THD *thd)
 
Bounds_checked_array< ItemHandleCollectHeadForStaticWindowFunction (THD *thd, ItemHandle argument_item, Window *window)
 
void AddFDsFromConstItems (THD *thd)
 Try to add FDs from items that are constant by themselves, e.g. More...
 
void FindElementsThatCanBeAddedByFDs ()
 
Ordering ReduceOrdering (Ordering ordering, bool all_fds, OrderElement *tmpbuf) const
 Remove redundant elements using the functional dependencies that we have, to give a more canonical form before homogenization. More...
 
void PreReduceOrderings (THD *thd)
 Do safe reduction on all orderings (some of them may get merged by PruneUninterestingOrders() later), ie., remove all items that may be removed using only FDs that always are active. More...
 
void CreateOrderingsFromGroupings (THD *thd)
 We don't currently have any operators that only group and do not sort (e.g. More...
 
void CreateHomogenizedOrderings (THD *thd)
 For each interesting ordering, see if we can homogenize it onto each table. More...
 
void AddHomogenizedOrderingIfPossible (THD *thd, Ordering reduced_ordering, bool used_at_end, int table_idx, Bounds_checked_array< std::pair< ItemHandle, ItemHandle > > reverse_canonical, OrderElement *tmpbuf)
 Helper function for CreateHomogenizedOrderings(). More...
 
bool CouldBecomeInterestingOrdering (Ordering ordering) const
 For a given ordering, check whether it ever has the hope of becoming an interesting ordering. More...
 
void BuildNFSM (THD *thd)
 
void AddGroupingFromOrdering (THD *thd, int state_idx, Ordering ordering, OrderElement *tmpbuf)
 
void TryAddingOrderWithElementInserted (THD *thd, int state_idx, int fd_idx, Ordering old_ordering, size_t start_point, ItemHandle item_to_add, enum_order direction, OrderElement *tmpbuf)
 
void PruneNFSM (THD *thd)
 Try to prune away irrelevant nodes from the NFSM; it is worth spending some time on this, since the number of NFSM states can explode the size of the DFSM. More...
 
bool AlwaysActiveFD (int fd_idx)
 
void FinalizeDFSMState (THD *thd, int state_idx)
 
void ExpandThroughAlwaysActiveFDs (Mem_root_array< int > *nfsm_states, int *generation, int extra_allowed_fd_idx)
 
void ConvertNFSMToDFSM (THD *thd)
 From the NFSM, convert an equivalent DFSM. More...
 
void FindInitialStatesForOrdering ()
 
int AddArtificialState (THD *thd, Ordering ordering)
 
void AddEdge (THD *thd, int state_idx, int required_fd_idx, Ordering ordering)
 
bool FunctionalDependencyApplies (const FunctionalDependency &fd, const Ordering ordering, int *start_point) const
 
std::string PrintOrdering (Ordering ordering) const
 
std::string PrintFunctionalDependency (const FunctionalDependency &fd, bool html) const
 
void PrintFunctionalDependencies (std::string *trace)
 
void PrintInterestingOrders (std::string *trace)
 
void PrintNFSMDottyGraph (std::string *trace) const
 
void PrintDFSMDottyGraph (std::string *trace) const
 

Private Attributes

bool m_built = false
 
Mem_root_array< ItemInfom_items
 
Bounds_checked_array< ItemHandlem_aggregate_head
 
bool m_rollup = false
 
Mem_root_array< OrderingWithInfom_orderings
 
int m_longest_ordering = 0
 
Mem_root_array< FunctionalDependencym_fds
 
Mem_root_array< NFSMStatem_states
 
Mem_root_array< NFSMEdgem_edges
 
Mem_root_array< DFSMStatem_dfsm_states
 
Mem_root_array< DFSMEdgem_dfsm_edges
 
Bounds_checked_array< int > m_optimized_ordering_mapping
 
Bounds_checked_array< int > m_optimized_fd_mapping
 

Member Typedef Documentation

◆ StateIndex

Constructor & Destructor Documentation

◆ LogicalOrderings()

LogicalOrderings::LogicalOrderings ( THD thd)
explicit

Member Function Documentation

◆ AddArtificialState()

int LogicalOrderings::AddArtificialState ( THD thd,
Ordering  ordering 
)
private

◆ AddEdge()

void LogicalOrderings::AddEdge ( THD thd,
int  state_idx,
int  required_fd_idx,
Ordering  ordering 
)
private

◆ AddFDsFromAggregateItems()

void LogicalOrderings::AddFDsFromAggregateItems ( THD thd)
private

◆ AddFDsFromComputedItems()

void LogicalOrderings::AddFDsFromComputedItems ( THD thd)
private

Try to add new FDs from items that are not base items; e.g., if we have an item (a + 1), we add {a} → (a + 1) (since addition is deterministic).

This can help reducing orderings that are on such derived items. For simplicity, we only bother doing this for items that derive from a single base field; i.e., from (a + b), we don't add {a,b} → (a + b) even though we could. Also note that these are functional dependencies, not equivalences; even though ORDER BY (a + 1) could be satisfied by an ordering on (a) (barring overflow issues), this does not hold in general, e.g. ORDER BY (-a) is not satisfied by an ordering on (a), not to mention ORDER BY (a*a). We do not have the framework in Item to understand which functions are monotonous, so we do not attempt to create equivalences.

This is really the only the case where we can get transitive FDs that are not equivalences. Since our approach does not apply FDs transitively without adding the intermediate item (e.g., for {a} → b and {b} → c, we won't extend (a) to (ac), only to (abc)), we extend any existing FDs here when needed.

◆ AddFDsFromConstItems()

void LogicalOrderings::AddFDsFromConstItems ( THD thd)
private

Try to add FDs from items that are constant by themselves, e.g.

if someone does ORDER BY 'x', add a new FD {} → 'x' so that the ORDER BY can be elided.

TODO(sgunders): This can potentially remove subqueries or other functions that would throw errors if actually executed, potentially modifying semantics. See if that is illegal, and thus, if we need to test-execute them at least once somehow (ideally not during optimization).

◆ AddFunctionalDependency()

int LogicalOrderings::AddFunctionalDependency ( THD thd,
FunctionalDependency  fd 
)

◆ AddGroupingFromOrdering()

void LogicalOrderings::AddGroupingFromOrdering ( THD thd,
int  state_idx,
Ordering  ordering,
OrderElement tmpbuf 
)
private

◆ AddHomogenizedOrderingIfPossible()

void LogicalOrderings::AddHomogenizedOrderingIfPossible ( THD thd,
Ordering  reduced_ordering,
bool  used_at_end,
int  table_idx,
Bounds_checked_array< std::pair< ItemHandle, ItemHandle > >  reverse_canonical,
OrderElement tmpbuf 
)
private

Helper function for CreateHomogenizedOrderings().

◆ AddOrdering()

int LogicalOrderings::AddOrdering ( THD thd,
Ordering  order,
bool  interesting,
bool  used_at_end,
table_map  homogenize_tables 
)
inline

◆ AddOrderingInternal()

int LogicalOrderings::AddOrderingInternal ( THD thd,
Ordering  order,
OrderingWithInfo::Type  type,
bool  used_at_end,
table_map  homogenize_tables 
)
private

◆ AlwaysActiveFD()

bool LogicalOrderings::AlwaysActiveFD ( int  fd_idx)
private

◆ ApplyFDs()

LogicalOrderings::StateIndex LogicalOrderings::ApplyFDs ( LogicalOrderings::StateIndex  state_idx,
FunctionalDependencySet  fds 
) const

◆ Build()

void LogicalOrderings::Build ( THD thd,
std::string *  trace 
)

◆ BuildEquivalenceClasses()

void LogicalOrderings::BuildEquivalenceClasses ( )
private

◆ BuildNFSM()

void LogicalOrderings::BuildNFSM ( THD thd)
private

◆ CollectHeadForStaticWindowFunction()

Bounds_checked_array< ItemHandle > LogicalOrderings::CollectHeadForStaticWindowFunction ( THD thd,
ItemHandle  argument_item,
Window window 
)
private

◆ ConvertNFSMToDFSM()

void LogicalOrderings::ConvertNFSMToDFSM ( THD thd)
private

From the NFSM, convert an equivalent DFSM.

This is by means of the so-called powerset conversion, which is more commonly used to convert NFAs to DFAs. (The only real difference is that FAs have accepting states, while our FSM instead needs to store information about constituent interesting order states.)

The powerset algorithm works by creating DFSM states that represent sets of NFSM states we could be in. E.g., if we have a state (a) and an FD {} → x can lead to new states () (ϵ-edge), (a) (implicit self-edge), (x), (ax), (xa), then we create a single new DFSM state that represent all those five states, and an {} → x edge from {(a)} to that new state. When creating edges from such superstates, we need to follow that FD from all of them, so the list of constituent states can be fairly large.

In theory, we could get 2^n DFSM states from n NFSM states, but in practice, we get fewer since our orderings generally only increase, not decrease. We only generate DFSM states by following FDs from the initial NFSM state; we don't create states eagerly for all 2^n possibilities.

When creating DFSM states, we always include states that can be reached by means of always-active FDs. The ϵ edge (drop the last element from the ordering) is always active, and the client can also mark others as such. This means we get fewer DFSM states and fewer FDs to follow. See FunctionalDependency::always_active.

◆ CouldBecomeInterestingOrdering()

bool LogicalOrderings::CouldBecomeInterestingOrdering ( Ordering  ordering) const
private

For a given ordering, check whether it ever has the hope of becoming an interesting ordering.

In its base form, this is a prefix check; if we have an ordering (a,b) and an interesting order (a,b,c), it passes. However, we add some slightly more lax heuristics in order to make the graph a bit wider at build time (and thus require fewer FD applications at runtime); namely, if there's a prefix mismatch but the item could be added by some FD later (without the ordering becoming too long), we let it slide and just skip that item.

E.g.: If we have an ordering (a,b) and an interesting order (a,x,b), we first match a. x does not match b, but we check whether x is ever on the right side of any FD (for instance because there might be an FD a → x). If it is, we skip it and match b with b. There's an example of this in the DoesNotStrictlyPruneOnPrefixes unit test.

Obviously, this leads to false positives, but that is fine; this is just to prune down the amount of states in the NFSM. [Neu04] points out that such pruning is pretty much essential for performance, and our experience is the same.

There is one extra quirk; the prefix check needs to take equivalences into account, or we would prune away orderings that could become interesting after equivalences. We solve this by always mapping to an equivalence class when doing the prefix comparison. There's an example of this in the TwoEquivalences unit test.

◆ CreateHomogenizedOrderings()

void LogicalOrderings::CreateHomogenizedOrderings ( THD thd)
private

For each interesting ordering, see if we can homogenize it onto each table.

A homogenized ordering is one that refers to fewer tables than the original one – in our case, a single table. (If we wanted to, we could homogenize down to sets of tables instead of single tables only. However, that would open up for O(2^n) orderings, so we restrict to single-table.)

The idea is to enable sort-ahead; find an ordering we can sort a single table in that, after later applying functional dependencies, eventually gives the desired ordering. This is just a heuristic (in particular, we only consider equivalences, not other functional dependencies), but in most cases will give us an ordering if any exist.

Neumann et al do not talk much about this, so this comes from the Simmen paper, where it is called “Homogenize Order”.

◆ CreateOrderingsFromGroupings()

void LogicalOrderings::CreateOrderingsFromGroupings ( THD thd)
private

We don't currently have any operators that only group and do not sort (e.g.

hash grouping), so we always implement grouping by sorting. This function makes that representation explicit – for each grouping, it will make sure there is at least one ordering representing that grouping. This means we never need to “sort by a grouping”, which would destroy ordering information that could be useful later.

As an example, take SELECT ... GROUP BY a, b ORDER BY a. This needs to group first by {a,b} (assume we're using filesort, not an index), then sort by (a). If we just represent the sort we're doing as going directly to {a,b}, we can't elide the sort on (a). Instead, we create a sort (a,b) (implicitly convertible to {a,b}), which makes the FSM understand that we're both sorted on (a,b) and grouped on {a,b}, and then also sorted on (a).

Any given grouping would be satisfied by lots of different orderings: {a,b} could be (a,b), (b,a), (a DESC, b) etc.. We look through all interesting orders that are a subset of our grouping, and if they are, we extend them arbitrarily to complete the grouping. E.g., if our grouping is {a,b,c,d} and the ordering (c DESC, b) is interesting, we make a homogenized ordering (c DESC, b, a, d). This is roughly equivalent to Simmen's “Cover Order” procedure. If we cannot make such a cover, we simply make a new last-resort ordering (a,b,c,d).

We don't consider equivalences here; perhaps we should, at least for at-end groupings.

◆ DoesFollowOrder()

bool LogicalOrderings::DoesFollowOrder ( StateIndex  state_idx,
int  ordering_idx 
) const
inline

◆ ExpandThroughAlwaysActiveFDs()

void LogicalOrderings::ExpandThroughAlwaysActiveFDs ( Mem_root_array< int > *  nfsm_states,
int *  generation,
int  extra_allowed_fd_idx 
)
private

◆ FinalizeDFSMState()

void LogicalOrderings::FinalizeDFSMState ( THD thd,
int  state_idx 
)
private

◆ FindElementsThatCanBeAddedByFDs()

void LogicalOrderings::FindElementsThatCanBeAddedByFDs ( )
private

◆ FindInitialStatesForOrdering()

void LogicalOrderings::FindInitialStatesForOrdering ( )
private

◆ FunctionalDependencyApplies()

bool LogicalOrderings::FunctionalDependencyApplies ( const FunctionalDependency fd,
const Ordering  ordering,
int *  start_point 
) const
private

◆ GetFDSet()

FunctionalDependencySet LogicalOrderings::GetFDSet ( int  fd_idx) const
inline

◆ GetHandle()

ItemHandle LogicalOrderings::GetHandle ( Item item)

◆ ImpliedByEarlierElements()

bool LogicalOrderings::ImpliedByEarlierElements ( ItemHandle  item,
Ordering  prefix,
bool  all_fds 
) const
private

Checks whether the given item is redundant given previous elements in the ordering; ie., whether adding it will never change the ordering.

This could either be because it's a duplicate, or because it is implied by functional dependencies. When this is applied to all elements in turn, it is called “reducing” the ordering. [Neu04] claims that this operation is not confluent, which is erroneous (their example is faulty, ignoring that Simmen reduces from the back). [Neu04b] has modified the claim to be that it is not confluent for groupings, which is correct. We make no attempt at optimality.

If all_fds is true, we consider all functional dependencies, including those that may not always be active; e.g. a FD a=b may come from a join, and thus does not hold before the join is actually done, but we assume it holds anyway. This is OK from order homogenization, which is concerned with making orderings that will turn into the desired interesting ordering (e.g. for ORDER BY) only after all joins have been done. It would not be OK if we were to use it for merge joins somehow.

◆ item()

Item * LogicalOrderings::item ( ItemHandle  item) const
inline

◆ ItemBeforeInGroup()

bool LogicalOrderings::ItemBeforeInGroup ( const OrderElement a,
const OrderElement b 
)
inlineprivate

◆ ItemHandleBeforeInGroup()

bool LogicalOrderings::ItemHandleBeforeInGroup ( ItemHandle  a,
ItemHandle  b 
)
inlineprivate

◆ MoreOrderedThan()

bool LogicalOrderings::MoreOrderedThan ( StateIndex  a_idx,
StateIndex  b_idx,
std::bitset< kMaxSupportedOrderings ignored_orderings 
) const
inline

◆ num_fds()

int LogicalOrderings::num_fds ( ) const
inline

◆ num_orderings()

int LogicalOrderings::num_orderings ( ) const
inline

◆ ordering()

Ordering LogicalOrderings::ordering ( int  ordering_idx) const
inline

◆ ordering_is_relevant_for_sortahead()

bool LogicalOrderings::ordering_is_relevant_for_sortahead ( int  ordering_idx) const
inline

◆ PreReduceOrderings()

void LogicalOrderings::PreReduceOrderings ( THD thd)
private

Do safe reduction on all orderings (some of them may get merged by PruneUninterestingOrders() later), ie., remove all items that may be removed using only FDs that always are active.

There's a problem in [Neu04] that is never adequately addressed; orderings are only ever expanded, and then eventually compared against interesting orders. But the interesting order itself is not necessarily extended, due to pruning. For instance, if an index could yield (x,y) and we have {} → x, there's no way we could get it to match the interesting order (y) even though they are logically equivalent. For an even trickier case, imagine an index (x,y) and an interesting order (y,z), with {} → x and y → z. For this to match, we'd need to have a “super-order” (x,y,z) and infer that from both orderings.

Instead, we do a pre-step related to Simmen's “Test Ordering” procedure; we reduce the orderings. In the example above, both will be reduced to (y), and then match. This is mostly a band-aid around the problem; for instance, it cannot deal with FDs that are not always active, and it does not deal adequately with groupings (since reduction does not).

Note that this could make the empty ordering interesting after merging.

◆ PrintDFSMDottyGraph()

void LogicalOrderings::PrintDFSMDottyGraph ( std::string *  trace) const
private

◆ PrintFunctionalDependencies()

void LogicalOrderings::PrintFunctionalDependencies ( std::string *  trace)
private

◆ PrintFunctionalDependency()

string LogicalOrderings::PrintFunctionalDependency ( const FunctionalDependency fd,
bool  html 
) const
private

◆ PrintInterestingOrders()

void LogicalOrderings::PrintInterestingOrders ( std::string *  trace)
private

◆ PrintNFSMDottyGraph()

void LogicalOrderings::PrintNFSMDottyGraph ( std::string *  trace) const
private

◆ PrintOrdering()

string LogicalOrderings::PrintOrdering ( Ordering  ordering) const
private

◆ PruneFDs()

void LogicalOrderings::PruneFDs ( THD thd)
private

◆ PruneNFSM()

void LogicalOrderings::PruneNFSM ( THD thd)
private

Try to prune away irrelevant nodes from the NFSM; it is worth spending some time on this, since the number of NFSM states can explode the size of the DFSM.

Like with PruneFDs(), we don't do any of the pruning described in [Neu04]; it is unclear exactly what is meant, but it would seem the state removal/merging there is either underdefined or simply does not do anything except remove trivially bad nodes (those that cannot reach anything).

This also sets the can_reach_interesting_order bitmap on each NFSM node.

◆ PruneUninterestingOrders()

void LogicalOrderings::PruneUninterestingOrders ( THD thd)
private

Try to get rid of uninteresting orders, possibly by discarding irrelevant suffixes and merging them with others.

In a typical query, this removes a large amount of index-created orderings that will never get to something interesting, reducing the end FSM size (and thus, reducing the number of different access paths we have to keep around).

This step is the only one that can move orderings around, and thus also populates m_optimized_ordering_mapping.

◆ RecanonicalizeGroupings()

void LogicalOrderings::RecanonicalizeGroupings ( )
private

◆ ReduceOrdering()

Ordering LogicalOrderings::ReduceOrdering ( Ordering  ordering,
bool  all_fds,
OrderElement tmpbuf 
) const
private

Remove redundant elements using the functional dependencies that we have, to give a more canonical form before homogenization.

Note that we assume here that every functional dependency holds, so this is not applicable generally throughout the tree, only at the end (e.g. final ORDER BY). This is called “Reduce Order” in the Simmen paper.

tmpbuf is used as the memory store for the new ordering.

◆ RemapOrderingIndex()

int LogicalOrderings::RemapOrderingIndex ( int  ordering_idx) const
inline

◆ SetHeadForAggregates()

void LogicalOrderings::SetHeadForAggregates ( Bounds_checked_array< ItemHandle head)
inline

◆ SetOrder()

StateIndex LogicalOrderings::SetOrder ( int  ordering_idx) const
inline

◆ SetRollup()

void LogicalOrderings::SetRollup ( bool  rollup)
inline

◆ TryAddingOrderWithElementInserted()

void LogicalOrderings::TryAddingOrderWithElementInserted ( THD thd,
int  state_idx,
int  fd_idx,
Ordering  old_ordering,
size_t  start_point,
ItemHandle  item_to_add,
enum_order  direction,
OrderElement tmpbuf 
)
private

Member Data Documentation

◆ m_aggregate_head

Bounds_checked_array<ItemHandle> LogicalOrderings::m_aggregate_head
private

◆ m_built

bool LogicalOrderings::m_built = false
private

◆ m_dfsm_edges

Mem_root_array<DFSMEdge> LogicalOrderings::m_dfsm_edges
private

◆ m_dfsm_states

Mem_root_array<DFSMState> LogicalOrderings::m_dfsm_states
private

◆ m_edges

Mem_root_array<NFSMEdge> LogicalOrderings::m_edges
private

◆ m_fds

Mem_root_array<FunctionalDependency> LogicalOrderings::m_fds
private

◆ m_items

Mem_root_array<ItemInfo> LogicalOrderings::m_items
private

◆ m_longest_ordering

int LogicalOrderings::m_longest_ordering = 0
private

◆ m_optimized_fd_mapping

Bounds_checked_array<int> LogicalOrderings::m_optimized_fd_mapping
private

◆ m_optimized_ordering_mapping

Bounds_checked_array<int> LogicalOrderings::m_optimized_ordering_mapping
private

◆ m_orderings

Mem_root_array<OrderingWithInfo> LogicalOrderings::m_orderings
private

◆ m_rollup

bool LogicalOrderings::m_rollup = false
private

◆ m_states

Mem_root_array<NFSMState> LogicalOrderings::m_states
private

The documentation for this class was generated from the following files: