MySQL  8.0.19
Source Code Documentation
Query Executor

Classes

struct  PendingInvalidator
 Similar to PendingCondition, but for cache invalidator iterators. More...
 

Enumerations

enum  CallingContext { TOP_LEVEL, DIRECTLY_UNDER_SEMIJOIN, DIRECTLY_UNDER_OUTER_JOIN, DIRECTLY_UNDER_WEEDOUT }
 
enum  Substructure { Substructure::NONE, Substructure::OUTER_JOIN, Substructure::SEMIJOIN, Substructure::WEEDOUT }
 

Functions

static void return_zero_rows (JOIN *join, List< Item > &fields)
 For some reason, e.g. More...
 
static int do_select (JOIN *join)
 Make a join of all tables and write it on socket or to table. More...
 
static enum_nested_loop_state evaluate_join_record (JOIN *join, QEP_TAB *const qep_tab)
 Process one row of the nested loop join. More...
 
static enum_nested_loop_state evaluate_null_complemented_join_record (JOIN *join, QEP_TAB *qep_tab)
 
static enum_nested_loop_state end_send (JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
 
static enum_nested_loop_state end_write (JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
 
static enum_nested_loop_state end_write_wf (JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
 Similar to end_write, but used in the windowing tmp table steps. More...
 
static enum_nested_loop_state end_update (JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
 Group by searching after group record and updating it if possible. More...
 
static int read_system (TABLE *table)
 Read a constant table when there is at most one matching row, using a table scan. More...
 
static int read_const (TABLE *table, TABLE_REF *ref)
 
static bool remove_dup_with_compare (THD *thd, TABLE *entry, Field **field, ulong offset, Item *having)
 
static bool remove_dup_with_hash_index (THD *thd, TABLE *table, Field **first_field, const size_t *field_lengths, size_t key_length, Item *having)
 Generate a hash index for each row to quickly find duplicate rows. More...
 
static int do_sj_reset (SJ_TMP_TABLE *sj_tbl)
 SemiJoinDuplicateElimination: Reset the temporary table. More...
 
static bool alloc_group_fields (JOIN *join, ORDER *group)
 Get a list of buffers for saveing last group. More...
 
static void SetCostOnTableIterator (const Cost_model_server &cost_model, const POSITION *pos, bool is_after_filter, RowIterator *iterator)
 
static bool having_is_true (Item *h)
 Evaluates HAVING condition. More...
 
string RefToString (const TABLE_REF &ref, const KEY *key, bool include_nulls)
 
bool has_rollup_result (Item *item)
 Checks if an item has a ROLLUP NULL which needs to be written to temp table. More...
 
bool prepare_sum_aggregators (Item_sum **func_ptr, bool need_distinct)
 
bool setup_sum_funcs (THD *thd, Item_sum **func_ptr)
 Call setup() for all sum functions. More...
 
void init_tmptable_sum_functions (Item_sum **func_ptr)
 
void update_tmptable_sum_func (Item_sum **func_ptr, TABLE *tmp_table)
 Update record 0 in tmp_table from record 1. More...
 
void copy_sum_funcs (Item_sum **func_ptr, Item_sum **end_ptr)
 Copy result of sum functions to record in tmp_table. More...
 
bool init_sum_functions (Item_sum **func_ptr, Item_sum **end_ptr)
 
bool update_sum_func (Item_sum **func_ptr)
 
bool copy_funcs (Temp_table_param *param, const THD *thd, Copy_func_type type)
 Copy result of functions to record in tmp_table. More...
 
static enum_nested_loop_state end_sj_materialize (JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
 
static bool update_const_equal_items (THD *thd, Item *cond, JOIN_TAB *tab)
 Check appearance of new constant items in multiple equalities of a condition after reading a constant table. More...
 
void setup_tmptable_write_func (QEP_TAB *tab, Opt_trace_object *trace)
 Setup write_func of QEP_tmp_table object. More...
 
static size_t record_prefix_size (const QEP_TAB *qep_tab)
 Find out how many bytes it takes to store the smallest prefix which covers all the columns that will be read from a table. More...
 
bool set_record_buffer (const QEP_TAB *tab)
 Allocate a data buffer that the storage engine can use for fetching batches of records. More...
 
static void ExtractConditions (Item *condition, vector< Item * > *condition_parts)
 Split AND conditions into their constituent parts, recursively. More...
 
unique_ptr_destroy_only< RowIteratorPossiblyAttachFilterIterator (unique_ptr_destroy_only< RowIterator > iterator, const vector< Item * > &conditions, THD *thd)
 Return a new iterator that wraps "iterator" and that tests all of the given conditions (if any), ANDed together. More...
 
unique_ptr_destroy_only< RowIteratorCreateNestedLoopIterator (THD *thd, unique_ptr_destroy_only< RowIterator > left_iterator, unique_ptr_destroy_only< RowIterator > right_iterator, JoinType join_type, bool pfs_batch_mode)
 
static unique_ptr_destroy_only< RowIteratorCreateInvalidatorIterator (THD *thd, QEP_TAB *qep_tab, unique_ptr_destroy_only< RowIterator > iterator)
 
static unique_ptr_destroy_only< RowIteratorPossiblyAttachFilterIterator (unique_ptr_destroy_only< RowIterator > iterator, const vector< PendingCondition > &conditions, THD *thd)
 
static Item_func_trig_condGetTriggerCondOrNull (Item *item)
 
void ConvertItemsToCopy (List< Item > *items, Field **fields, Temp_table_param *param, JOIN *join)
 For historical reasons, derived table materialization and temporary table materialization didn't specify the fields to materialize in the same way. More...
 
void SplitConditions (Item *condition, vector< Item * > *predicates_below_join, vector< PendingCondition > *predicates_above_join)
 
static void MarkUnhandledDuplicates (SJ_TMP_TABLE *weedout, plan_idx weedout_start, plan_idx weedout_end, qep_tab_map *unhandled_duplicates)
 For a given duplicate weedout operation, figure out which tables are supposed to be deduplicated by it, and add those to unhandled_duplicates. More...
 
static unique_ptr_destroy_only< RowIteratorCreateWeedoutIterator (THD *thd, unique_ptr_destroy_only< RowIterator > iterator, SJ_TMP_TABLE *weedout_table)
 
static unique_ptr_destroy_only< RowIteratorCreateWeedoutIteratorForTables (THD *thd, const qep_tab_map tables_to_deduplicate, QEP_TAB *qep_tabs, uint primary_tables, unique_ptr_destroy_only< RowIterator > iterator)
 
static Substructure FindSubstructure (QEP_TAB *qep_tabs, const plan_idx first_idx, const plan_idx this_idx, const plan_idx last_idx, CallingContext calling_context, bool *add_limit_1, plan_idx *substructure_end, qep_tab_map *unhandled_duplicates)
 Given a range of tables (where we assume that we've already handled first_idx..(this_idx-1) as inner joins), figure out whether this is a semijoin, an outer join or a weedout. More...
 
unique_ptr_destroy_only< RowIteratorGetTableIterator (THD *thd, QEP_TAB *qep_tab, QEP_TAB *qep_tabs)
 Get the RowIterator used for scanning the given table, with any required materialization operations done first. More...
 
void SetCostOnNestedLoopIterator (const Cost_model_server &cost_model, const POSITION *pos_right, RowIterator *iterator)
 
void SetCostOnHashJoinIterator (const Cost_model_server &cost_model, const POSITION *pos_right, RowIterator *iterator)
 
static void ExtractHashJoinConditions (const QEP_TAB *current_table, qep_tab_map left_tables, vector< Item * > *predicates, vector< Item_func_eq * > *hash_join_conditions, vector< Item * > *conditions_after_hash_join)
 
static unique_ptr_destroy_only< RowIteratorConnectJoins (plan_idx first_idx, plan_idx last_idx, QEP_TAB *qep_tabs, THD *thd, CallingContext calling_context, vector< PendingCondition > *pending_conditions, vector< PendingInvalidator > *pending_invalidators, qep_tab_map *unhandled_duplicates)
 For a given slice of the table list, build up the iterator tree corresponding to the tables in that slice. More...
 
static int ExecuteIteratorQuery (JOIN *join)
 
enum_nested_loop_state sub_select_op (JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
 Accumulate full or partial join result in operation and send operation's result further. More...
 
enum_nested_loop_state sub_select (JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
 Retrieve records ends with a given beginning from the result of a join. More...
 
int do_sj_dups_weedout (THD *thd, SJ_TMP_TABLE *sjtbl)
 SemiJoinDuplicateElimination: Weed out duplicate row combinations. More...
 
int report_handler_error (TABLE *table, int error)
 Help function when we get some an error from the table handler. More...
 
static bool init_index_and_record_buffer (const QEP_TAB *qep_tab, handler *file, uint idx, bool sorted)
 Initialize an index scan and the record buffer to use in the scan. More...
 
int safe_index_read (QEP_TAB *tab)
 
int join_read_const_table (JOIN_TAB *tab, POSITION *pos)
 Reads content of constant table. More...
 
void join_setup_iterator (QEP_TAB *tab)
 Prepare table for reading rows and read first record. More...
 
int join_materialize_table_function (QEP_TAB *tab)
 
int join_materialize_derived (QEP_TAB *tab)
 
int join_materialize_semijoin (QEP_TAB *tab)
 
ulonglong get_exact_record_count (QEP_TAB *qep_tab, uint table_count, int *error)
 Get exact count of rows in all tables. More...
 
enum_nested_loop_state end_send_count (JOIN *join, QEP_TAB *qep_tab)
 
enum_nested_loop_state end_send_group (JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
 
static bool cmp_field_value (Field *field, ptrdiff_t diff)
 
static bool group_rec_cmp (ORDER *group, uchar *rec0, uchar *rec1)
 Compare GROUP BY in from tmp table's record[0] and record[1]. More...
 
static bool table_rec_cmp (TABLE *table)
 Compare GROUP BY in from tmp table's record[0] and record[1]. More...
 
ulonglong unique_hash (const Field *field, ulonglong *hash_val)
 Generate hash for a field. More...
 
static ulonglong unique_hash_group (ORDER *group)
 Generate hash for unique constraint according to group-by list. More...
 
static ulonglong unique_hash_fields (TABLE *table)
 
bool check_unique_constraint (TABLE *table)
 Check unique_constraint. More...
 
static void reset_wf_states (Func_ptr_array *func_ptr, bool framing)
 Minion for reset_framing_wf_states and reset_non_framing_wf_state, q.v. More...
 
static void reset_framing_wf_states (Func_ptr_array *func_ptr)
 Walk the function calls and reset any framing window function's window state. More...
 
static void reset_non_framing_wf_state (Func_ptr_array *func_ptr)
 Walk the function calls and reset any non-framing window function's window state. More...
 
static bool buffer_record_somewhere (THD *thd, Window *w, int64 rowno)
 Save a window frame buffer to frame buffer temporary table. More...
 
bool buffer_windowing_record (THD *thd, Temp_table_param *param, bool *new_partition)
 If we cannot evaluate all window functions for a window on the fly, buffer the current row for later processing by process_buffered_windowing_record. More...
 
static bool read_frame_buffer_row (int64 rowno, Window *w, bool for_nth_value)
 Read row rowno from frame buffer tmp file using cached row positions to minimize positioning work. More...
 
static void dbug_allow_write_all_columns (Temp_table_param *param, std::map< TABLE *, my_bitmap_map * > &map)
 
static void dbug_restore_all_columns (std::map< TABLE *, my_bitmap_map * > &map)
 
bool bring_back_frame_row (THD *thd, Window *w, Temp_table_param *out_param, int64 rowno, Window_retrieve_cached_row_reason reason, int fno)
 Bring back buffered data to the record of qep_tab-1 [1], and optionally execute copy_fields() to the OUT table. More...
 
static bool process_wfs_needing_card (THD *thd, Temp_table_param *param, const Window::st_nth &have_nth_value, const Window::st_lead_lag &have_lead_lag, const int64 current_row, Window *w, Window_retrieve_cached_row_reason current_row_reason)
 Process window functions that need partition cardinality. More...
 
bool process_buffered_windowing_record (THD *thd, Temp_table_param *param, const bool new_partition_or_eof, bool *output_row_ready)
 While there are more unprocessed rows ready to process given the current partition/frame state, process such buffered rows by evaluating/aggregating the window functions defined over this window on the current frame, moving the frame if required. More...
 
static enum_nested_loop_state write_or_send_row (JOIN *join, QEP_TAB *const qep_tab, TABLE *const table, Temp_table_param *const out_tbl)
 The last step in a series of windows do not need to write a tmp file if both a) and b) holds: More...
 
enum_nested_loop_state end_write_group (JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
 
static bool compare_record (TABLE *table, Field **ptr)
 
static bool copy_blobs (Field **ptr)
 
static void free_blobs (Field **ptr)
 
static size_t compute_field_lengths (Field **first_field, size_t *field_lengths)
 For a set of fields, compute how many bytes their respective sort keys need. More...
 
bool construct_lookup_ref (THD *thd, TABLE *table, TABLE_REF *ref)
 Copy the lookup key into the table ref's key buffer. More...
 
bool make_group_fields (JOIN *main_join, JOIN *curr_join)
 allocate group fields or take prepared (cached). More...
 
int update_item_cache_if_changed (List< Cached_item > &list)
 
bool setup_copy_fields (List< Item > &all_fields, size_t num_select_elements, THD *thd, Temp_table_param *param, Ref_item_array ref_item_array, List< Item > *res_selected_fields, List< Item > *res_all_fields)
 Sets up caches for holding the values of non-aggregated expressions. More...
 
bool copy_fields (Temp_table_param *param, const THD *thd, bool reverse_copy)
 Make a copy of all simple SELECT'ed fields. More...
 
bool copy_fields_and_funcs (Temp_table_param *param, const THD *thd, Copy_func_type type)
 
bool change_to_use_tmp_fields (List< Item > &all_fields, size_t num_select_elements, THD *thd, Ref_item_array ref_item_array, List< Item > *res_selected_fields, List< Item > *res_all_fields)
 Change all funcs and sum_funcs to fields in tmp table, and create new list of all items. More...
 
bool change_refs_to_tmp_fields (List< Item > &all_fields, size_t num_select_elements, THD *thd, Ref_item_array ref_item_array, List< Item > *res_selected_fields, List< Item > *res_all_fields)
 Change all sum_func refs to fields to point at fields in tmp table. More...
 
void JOIN::exec ()
 Execute select, executor entry point. More...
 
bool JOIN::create_intermediate_table (QEP_TAB *tab, List< Item > *tmp_table_fields, ORDER_with_src &tmp_table_group, bool save_sum_fields)
 Create a temporary table to be used for processing DISTINCT/ORDER BY/GROUP BY. More...
 
bool JOIN::rollup_send_data (uint idx)
 Send all rollup levels higher than the current one to the client. More...
 
bool JOIN::rollup_write_data (uint idx, QEP_TAB *qep_tab)
 Write all rollup levels higher than the current one to a temp table. More...
 
void JOIN::optimize_distinct ()
 Optimize distinct when used on a subset of the tables. More...
 
Next_select_func JOIN::get_end_select_func ()
 
void JOIN::create_iterators ()
 If possible, convert the executor structures to a set of row iterators, storing the result in m_root_iterator. More...
 
void QEP_TAB::refresh_lateral ()
 Instructs each lateral derived table depending on this QEP_TAB, to rematerialize itself before emitting rows. More...
 
bool QEP_TAB::prepare_scan ()
 Prepare table to be scanned. More...
 
 ConstIterator::ConstIterator (THD *thd, TABLE *table, TABLE_REF *table_ref, ha_rows *examined_rows)
 
bool ConstIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
int ConstIterator::Read () override
 Read a constant table when there is at most one matching row, using an index lookup. More...
 
std::vector< std::string > ConstIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 EQRefIterator::EQRefIterator (THD *thd, TABLE *table, TABLE_REF *ref, bool use_order, ha_rows *examined_rows)
 
bool EQRefIterator::Init () override
 Read row using unique key: eq_ref access method implementation. More...
 
int EQRefIterator::Read () override
 Read row using unique key: eq_ref access method implementation. More...
 
void EQRefIterator::UnlockRow () override
 Since EQRefIterator may buffer a record, do not unlock it if it was not used in this invocation of EQRefIterator::Read(). More...
 
std::vector< std::string > EQRefIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 PushedJoinRefIterator::PushedJoinRefIterator (THD *thd, TABLE *table, TABLE_REF *ref, bool use_order, ha_rows *examined_rows)
 
bool PushedJoinRefIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
int PushedJoinRefIterator::Read () override
 Read a single row. More...
 
std::vector< std::string > PushedJoinRefIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 RefIterator< Reverse >::RefIterator (THD *thd, TABLE *table, TABLE_REF *ref, bool use_order, QEP_TAB *qep_tab, ha_rows *examined_rows)
 
bool RefIterator< Reverse >::Init () override
 Initialize or reinitialize the iterator. More...
 
std::vector< std::string > RefIterator< Reverse >::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 DynamicRangeIterator::DynamicRangeIterator (THD *thd, TABLE *table, QEP_TAB *qep_tab, ha_rows *examined_rows)
 
bool DynamicRangeIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
int DynamicRangeIterator::Read () override
 Read a single row. More...
 
std::vector< std::string > DynamicRangeIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
bool QEP_TAB::use_order () const
 Use ordering provided by chosen index? More...
 
 FullTextSearchIterator::FullTextSearchIterator (THD *thd, TABLE *table, TABLE_REF *ref, bool use_order, ha_rows *examined_rows)
 
 FullTextSearchIterator::~FullTextSearchIterator () override
 
bool FullTextSearchIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
int FullTextSearchIterator::Read () override
 Read a single row. More...
 
std::vector< std::string > FullTextSearchIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 RefOrNullIterator::RefOrNullIterator (THD *thd, TABLE *table, TABLE_REF *ref, bool use_order, QEP_TAB *qep_tab, ha_rows *examined_rows)
 Reading of key with key reference and one part that may be NULL. More...
 
bool RefOrNullIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
int RefOrNullIterator::Read () override
 Read a single row. More...
 
std::vector< std::string > RefOrNullIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
 AlternativeIterator::AlternativeIterator (THD *thd, TABLE *table, QEP_TAB *qep_tab, ha_rows *examined_rows, unique_ptr_destroy_only< RowIterator > source, TABLE_REF *ref)
 
bool AlternativeIterator::Init () override
 Initialize or reinitialize the iterator. More...
 
std::vector< std::string > AlternativeIterator::DebugString () const override
 Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator. More...
 
void QEP_TAB::pick_table_access_method ()
 Pick the appropriate access method functions. More...
 
void Window::save_special_record (uint64 special_rowno, TABLE *t)
 Save row special_rowno in table t->record[0] to an in-memory copy for later restoration. More...
 
void Window::restore_special_record (uint64 special_rowno, uchar *record)
 Restore row special_rowno into record from in-memory copy. More...
 
bool QEP_TAB::remove_duplicates ()
 
bool JOIN::clear_fields (table_map *save_nullinfo)
 Clear all result fields. More...
 
void JOIN::restore_fields (table_map save_nullinfo)
 Restore all result fields for all tables specified in save_nullinfo. More...
 
bool QEP_tmp_table::prepare_tmp_table ()
 Instantiate tmp table and start index scan if necessary. More...
 
enum_nested_loop_state QEP_tmp_table::put_record (bool end_of_records)
 Prepare table if necessary and call write_func to save record. More...
 
enum_nested_loop_state QEP_tmp_table::end_send ()
 Finish rnd/index scan after accumulating records, switch ref_array, and send accumulated records further. More...
 
bool QEP_TAB::pfs_batch_update (JOIN *join) const
 

Variables

static constexpr size_t MAX_RECORD_BUFFER_SIZE = 128 * 1024
 Maximum amount of space (in bytes) to allocate for a Record_buffer. More...
 
void JOIN::create_table_iterators ()
 
unique_ptr_destroy_only< RowIteratorJOIN::create_root_iterator_for_join ()
 
unique_ptr_destroy_only< RowIteratorJOIN::attach_iterators_for_having_and_limit (unique_ptr_destroy_only< RowIterator > iterator)
 

Detailed Description

Enumeration Type Documentation

◆ CallingContext

Enumerator
TOP_LEVEL 
DIRECTLY_UNDER_SEMIJOIN 
DIRECTLY_UNDER_OUTER_JOIN 
DIRECTLY_UNDER_WEEDOUT 

◆ Substructure

enum Substructure
strong
Enumerator
NONE 
OUTER_JOIN 
SEMIJOIN 
WEEDOUT 

Function Documentation

◆ AlternativeIterator()

AlternativeIterator::AlternativeIterator ( THD thd,
TABLE table,
QEP_TAB qep_tab,
ha_rows examined_rows,
unique_ptr_destroy_only< RowIterator source,
TABLE_REF ref 
)

◆ ConstIterator()

ConstIterator::ConstIterator ( THD thd,
TABLE table,
TABLE_REF table_ref,
ha_rows examined_rows 
)

◆ DynamicRangeIterator()

DynamicRangeIterator::DynamicRangeIterator ( THD thd,
TABLE table,
QEP_TAB qep_tab,
ha_rows examined_rows 
)

◆ EQRefIterator()

EQRefIterator::EQRefIterator ( THD thd,
TABLE table,
TABLE_REF ref,
bool  use_order,
ha_rows examined_rows 
)

◆ FullTextSearchIterator()

FullTextSearchIterator::FullTextSearchIterator ( THD thd,
TABLE table,
TABLE_REF ref,
bool  use_order,
ha_rows examined_rows 
)

◆ PushedJoinRefIterator()

PushedJoinRefIterator::PushedJoinRefIterator ( THD thd,
TABLE table,
TABLE_REF ref,
bool  use_order,
ha_rows examined_rows 
)

◆ RefIterator()

template<bool Reverse>
RefIterator< Reverse >::RefIterator ( THD thd,
TABLE table,
TABLE_REF ref,
bool  use_order,
QEP_TAB qep_tab,
ha_rows examined_rows 
)

◆ RefOrNullIterator()

RefOrNullIterator::RefOrNullIterator ( THD thd,
TABLE table,
TABLE_REF ref,
bool  use_order,
QEP_TAB qep_tab,
ha_rows examined_rows 
)

Reading of key with key reference and one part that may be NULL.

◆ ~FullTextSearchIterator()

FullTextSearchIterator::~FullTextSearchIterator ( )
override

◆ alloc_group_fields()

static bool alloc_group_fields ( JOIN join,
ORDER group 
)
static

Get a list of buffers for saveing last group.

Groups are saved in reverse order for easyer check loop.

◆ attach_iterators_for_having_and_limit()

unique_ptr_destroy_only< RowIterator > JOIN::attach_iterators_for_having_and_limit ( unique_ptr_destroy_only< RowIterator iterator)
private

◆ bring_back_frame_row()

bool bring_back_frame_row ( THD thd,
Window w,
Temp_table_param out_param,
int64  rowno,
Window_retrieve_cached_row_reason  reason,
int  fno 
)

Bring back buffered data to the record of qep_tab-1 [1], and optionally execute copy_fields() to the OUT table.

[1] This is not always the case. For the first window, if we have no PARTITION BY or ORDER BY in the window, and there is more than one table in the join, the logical input can consist of more than one table (qep_tab-1 .. qep_tab-n), so the record accordingly.

This method works by temporarily reversing the "normal" direction of the field copying.

Also make a note of the position of the record we retrieved in the window's m_frame_buffer_positions to be able to optimize succeeding retrievals.

Parameters
thdThe current thread
wThe current window
out_paramOUT table; if not nullptr, does copy_fields() to OUT
rownoThe row number (in the partition) to set up
reasonWhat kind of row to retrieve
fnoUsed with NTH_VALUE and LEAD/LAG to specify which window function's position cache to use, i.e. what index of m_frame_buffer_positions to update. For the second LEAD/LAG window function in a query, the index would be REA_MISC_POSITIONS (reason) + <no of NTH functions> + 2.
Returns
true on error

◆ buffer_record_somewhere()

static bool buffer_record_somewhere ( THD thd,
Window w,
int64  rowno 
)
static

Save a window frame buffer to frame buffer temporary table.

Parameters
thdThe current thread
wThe current window
rownoThe rowno in the current partition (1-based)

◆ buffer_windowing_record()

bool buffer_windowing_record ( THD thd,
Temp_table_param param,
bool new_partition 
)

If we cannot evaluate all window functions for a window on the fly, buffer the current row for later processing by process_buffered_windowing_record.

Parameters
thdCurrent thread
paramThe temporary table parameter
[in,out]new_partitionIf input is not nullptr: sets the bool pointed to to true if a new partition was found and there was a previous partition; if so the buffering of the first row in new partition isn't done and must be repeated later: we save away the row as rowno FBC_FIRST_IN_NEXT_PARTITION, then fetch it back later, cf. end_write_wf. If input is nullptr, this is the "later" call to buffer the first row of the new partition: buffer the row.
Returns
true if error.

◆ change_refs_to_tmp_fields()

bool change_refs_to_tmp_fields ( List< Item > &  all_fields,
size_t  num_select_elements,
THD thd,
Ref_item_array  ref_item_array,
List< Item > *  res_selected_fields,
List< Item > *  res_all_fields 
)

Change all sum_func refs to fields to point at fields in tmp table.

Change all funcs to be fields in tmp table.

Parameters
all_fieldsall fields list; should really be const, but Item does not always respect constness
num_select_elementsnumber of elements in select item list
thdTHD pointer
[out]ref_item_arrayarray of pointers to top elements of filed list
[out]res_selected_fieldsnew list of items of select item list
[out]res_all_fieldsnew list of all items
Returns
false if success, true if error

◆ change_to_use_tmp_fields()

bool change_to_use_tmp_fields ( List< Item > &  all_fields,
size_t  num_select_elements,
THD thd,
Ref_item_array  ref_item_array,
List< Item > *  res_selected_fields,
List< Item > *  res_all_fields 
)

Change all funcs and sum_funcs to fields in tmp table, and create new list of all items.

Parameters
all_fieldsall fields list; should really be const, but Item does not always respect constness
num_select_elementsnumber of elements in select item list
thdTHD pointer
[out]ref_item_arrayarray of pointers to top elements of filed list
[out]res_selected_fieldsnew list of items of select item list
[out]res_all_fieldsnew list of all items
Returns
false if success, true if error

◆ check_unique_constraint()

bool check_unique_constraint ( TABLE table)

Check unique_constraint.

Calculates record's hash and checks whether the record given in table->record[0] is already present in the tmp table.

Parameters
tableJOIN_TAB of tmp table to check
Note
This function assumes record[0] is already filled by the caller. Depending on presence of table->group, it's or full list of table's fields are used to calculate hash.
Returns
false same record was found true record wasn't found

◆ clear_fields()

bool JOIN::clear_fields ( table_map save_nullinfo)

Clear all result fields.

Non-aggregated fields are set to NULL, aggregated fields are set to their special "clear" value.

Result fields can be fields from input tables, field values generated by sum functions and literal values.

This is used when no rows are found during grouping: for FROM clause, a result row of all NULL values will be output; then SELECT list expressions get evaluated. E.g. SUM() will be NULL (the special "clear" value) and thus SUM() IS NULL will be true.

Note
Setting field values for input tables is a destructive operation, since it overwrite the NULL value flags with 1 bits. Rows from const tables are never re-read, hence their NULL value flags must be saved by this function and later restored by JOIN::restore_fields(). This is generally not necessary for non-const tables, since field values are overwritten when new rows are read.
Parameters
[out]save_nullinfoMap of tables whose fields were set to NULL, and for which NULL values must be restored. Should be set to all zeroes on entry to function.
Returns
false if success, true if error

◆ cmp_field_value()

static bool cmp_field_value ( Field field,
ptrdiff_t  diff 
)
static

◆ compare_record()

static bool compare_record ( TABLE table,
Field **  ptr 
)
static

◆ compute_field_lengths()

static size_t compute_field_lengths ( Field **  first_field,
size_t *  field_lengths 
)
static

For a set of fields, compute how many bytes their respective sort keys need.

Parameters
first_fieldArray of fields, terminated by nullptr.
[out]field_lengthsThe computed sort buffer length for each field. Must be allocated by the caller.
Return values
Thetotal number of bytes needed, sans extra alignment.
Note
This assumes that Field::sort_length() is constant for each field.

◆ ConnectJoins()

static unique_ptr_destroy_only<RowIterator> ConnectJoins ( plan_idx  first_idx,
plan_idx  last_idx,
QEP_TAB qep_tabs,
THD thd,
CallingContext  calling_context,
vector< PendingCondition > *  pending_conditions,
vector< PendingInvalidator > *  pending_invalidators,
qep_tab_map unhandled_duplicates 
)
static

For a given slice of the table list, build up the iterator tree corresponding to the tables in that slice.

It handles inner and outer joins, as well as semijoins (“first match”).

The join tree in MySQL is generally a left-deep tree of inner joins, so we can start at the left, make an inner join against the next table, join the result of that against the next table, etc.. However, a given sub-slice of the table list can be designated as an outer join, by setting first_inner() and last_inner() on the first table of said slice. (It is also set in some, but not all, of the other tables in the slice.) If so, we call ourselves recursively with that slice, put it as the right (inner) arm of an outer join, and then continue with our inner join.

Similarly, if a table N has set “first match” to table M (ie., jump back to table M whenever we see a non-filtered record in table N), then there is a subslice from [M+1,N] that we need to process recursively before putting it as the right side of a semijoin. Every semijoin can be implemented with a LIMIT 1, but for clarity and performance, we prefer to use a NestedLoopJoin with a special SEMI join type whenever possible. Sometimes, we have no choice, though (see the comments below). Note that we cannot use first_sj_inner() for detecting semijoins, as it is not updated when tables are reordered by the join optimizer. Outer joins and semijoins can nest, so we need to take some care to make sure that we pick the outermost structure to recurse on.

Conditions are a bit tricky. Conceptually, SQL evaluates conditions only after all tables have been joined; however, for efficiency reasons, we want to evaluate them as early as possible. As long as we are only dealing with inner joins, this is as soon as we've read all tables participating in the condition, but for outer joins, we need to wait until the join has happened. See pending_conditions below.

Parameters
first_idxindex of the first table in the slice we are creating a tree for (inclusive)
last_idxindex of the last table in the slice we are creating a tree for (exclusive)
qep_tabsthe full list of tables we are joining
thdthe THD to allocate the iterators on
calling_contextwhat situation we have immediately around is in the tree (ie., whether we are called to resolve the inner part of an outer join, a semijoin, etc.); mostly used to avoid infinite recursion where we would process e.g. the same semijoin over and over again
pending_conditionsif nullptr, we are not at the right (inner) side of any outer join and can evaluate conditions immediately. If not, we need to push any WHERE predicates to that vector and evaluate them only after joins.
pending_invalidatorssimilar to pending_conditions, but for tables that should have a CacheInvalidatorIterator synthesized for them; NULL-complemented rows must also invalidate materialized lateral derived tables.
[out]unhandled_duplicateslist of tables we should have deduplicated using duplicate weedout, but could not; append-only.

◆ construct_lookup_ref()

bool construct_lookup_ref ( THD thd,
TABLE table,
TABLE_REF ref 
)

Copy the lookup key into the table ref's key buffer.

Parameters
thdpointer to the THD object
tablethe table to read
refinformation about the index lookup key
Return values
falseref key copied successfully
trueerror dectected during copying of key

◆ ConvertItemsToCopy()

void ConvertItemsToCopy ( List< Item > *  items,
Field **  fields,
Temp_table_param param,
JOIN join 
)

For historical reasons, derived table materialization and temporary table materialization didn't specify the fields to materialize in the same way.

Temporary table materialization used copy_fields() and copy_funcs() (also reused for aggregation; see the comments on AggregateIterator for the relation between aggregations and temporary tables) to get the data into the Field pointers of the temporary table to be written, storing the lists in copy_fields and items_to_copy.

However, derived table materialization used JOIN::fields (which is a set of Item, not Field!) for the same purpose, calling fill_record() (which originally was meant for INSERT and UPDATE) instead. Thus, we have to rewrite one to the other, so that we can have only one MaterializeIterator. We choose to rewrite JOIN::fields to copy_fields/items_to_copy.

TODO: The optimizer should output just one kind of structure directly.

◆ copy_blobs()

static bool copy_blobs ( Field **  ptr)
static

◆ copy_fields()

bool copy_fields ( Temp_table_param param,
const THD thd,
bool  reverse_copy 
)

Make a copy of all simple SELECT'ed fields.

This is done at the start of a new group so that we can retrieve these later when the group changes. It is also used in materialization, to copy the values into the temporary table's fields.

Parameters
paramRepresents the current temporary file being produced
thdThe current thread
reverse_copyIf true, copies fields back from the frame buffer tmp table to the input table's buffer, cf. bring_back_frame_row.
Returns
false if OK, true on error.

◆ copy_fields_and_funcs()

bool copy_fields_and_funcs ( Temp_table_param param,
const THD thd,
Copy_func_type  type 
)

◆ copy_funcs()

bool copy_funcs ( Temp_table_param param,
const THD thd,
Copy_func_type  type 
)

Copy result of functions to record in tmp_table.

Uses the thread pointer to check for errors in some of the val_xxx() methods called by the save_in_result_field() function. TODO: make the Item::val_xxx() return error code

Parameters
paramCopy functions of tmp table specified by param
thdpointer to the current thread for error checking
typetype of function Items that need to be copied (used w.r.t windowing functions).
Return values
falseif OK
trueon error

◆ copy_sum_funcs()

void copy_sum_funcs ( Item_sum **  func_ptr,
Item_sum **  end_ptr 
)

Copy result of sum functions to record in tmp_table.

◆ create_intermediate_table()

bool JOIN::create_intermediate_table ( QEP_TAB tab,
List< Item > *  tmp_table_fields,
ORDER_with_src tmp_table_group,
bool  save_sum_fields 
)
private

Create a temporary table to be used for processing DISTINCT/ORDER BY/GROUP BY.

Note
Will modify JOIN object wrt sort/group attributes
Parameters
tabthe JOIN_TAB object to attach created table to
tmp_table_fieldsList of items that will be used to define column types of the table.
tmp_table_groupGroup key to use for temporary table, NULL if none.
save_sum_fieldsIf true, do not replace Item_sum items in tmp_fields list with Item_field items referring to fields in temporary table.
Returns
false on success, true on failure

If this is a window's OUT table, any final DISTINCT, ORDER BY will lead to windows showing use of tmp table in the final windowing step, so no need to signal use of tmp table unless we are here for another tmp table.

◆ create_iterators()

void JOIN::create_iterators ( )
private

If possible, convert the executor structures to a set of row iterators, storing the result in m_root_iterator.

If not, m_root_iterator will remain nullptr.

◆ create_root_iterator_for_join()

unique_ptr_destroy_only< RowIterator > JOIN::create_root_iterator_for_join ( )
private

◆ create_table_iterators()

void JOIN::create_table_iterators ( )
private

Helpers for create_iterators.

◆ CreateInvalidatorIterator()

static unique_ptr_destroy_only<RowIterator> CreateInvalidatorIterator ( THD thd,
QEP_TAB qep_tab,
unique_ptr_destroy_only< RowIterator iterator 
)
static

◆ CreateNestedLoopIterator()

unique_ptr_destroy_only<RowIterator> CreateNestedLoopIterator ( THD thd,
unique_ptr_destroy_only< RowIterator left_iterator,
unique_ptr_destroy_only< RowIterator right_iterator,
JoinType  join_type,
bool  pfs_batch_mode 
)

◆ CreateWeedoutIterator()

static unique_ptr_destroy_only<RowIterator> CreateWeedoutIterator ( THD thd,
unique_ptr_destroy_only< RowIterator iterator,
SJ_TMP_TABLE weedout_table 
)
static

◆ CreateWeedoutIteratorForTables()

static unique_ptr_destroy_only<RowIterator> CreateWeedoutIteratorForTables ( THD thd,
const qep_tab_map  tables_to_deduplicate,
QEP_TAB qep_tabs,
uint  primary_tables,
unique_ptr_destroy_only< RowIterator iterator 
)
static

◆ dbug_allow_write_all_columns()

static void dbug_allow_write_all_columns ( Temp_table_param param,
std::map< TABLE *, my_bitmap_map * > &  map 
)
inlinestatic

◆ dbug_restore_all_columns()

static void dbug_restore_all_columns ( std::map< TABLE *, my_bitmap_map * > &  map)
inlinestatic

◆ DebugString() [1/8]

template<bool Reverse>
vector< string > RefIterator< Reverse >::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [2/8]

vector< string > RefOrNullIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [3/8]

vector< string > EQRefIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [4/8]

vector< string > ConstIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [5/8]

vector< string > FullTextSearchIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [6/8]

vector< string > DynamicRangeIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [7/8]

vector< string > PushedJoinRefIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ DebugString() [8/8]

vector< string > AlternativeIterator::DebugString ( ) const
overridevirtual

Returns a short string (used for EXPLAIN FORMAT=tree) with user-readable information for this iterator.

When implementing these, try to avoid internal jargon (e.g. “eq_ref”); prefer things that read like normal, technical English (e.g. “single-row index lookup”).

For certain complex operations, such as MaterializeIterator, there can be multiple strings. If so, they are interpreted as nested operations, with the outermost, last-done operation first and the other ones indented as if they were child iterators.

Callers should use FullDebugString() below, which adds costs (see set_estimated_cost() etc.) if present.

Implements RowIterator.

◆ do_select()

static int do_select ( JOIN join)
static

Make a join of all tables and write it on socket or to table.

Return values
0if ok
1if error is sent
-1if error should be sent

◆ do_sj_dups_weedout()

int do_sj_dups_weedout ( THD thd,
SJ_TMP_TABLE sjtbl 
)

SemiJoinDuplicateElimination: Weed out duplicate row combinations.

SYNPOSIS do_sj_dups_weedout() thd Thread handle sjtbl Duplicate weedout table

DESCRIPTION Try storing current record combination of outer tables (i.e. their rowids) in the temporary table. This records the fact that we've seen this record combination and also tells us if we've seen it before.

RETURN -1 Error 1 The row combination is a duplicate (discard it) 0 The row combination is not a duplicate (continue)

◆ do_sj_reset()

static int do_sj_reset ( SJ_TMP_TABLE sj_tbl)
static

SemiJoinDuplicateElimination: Reset the temporary table.

◆ end_send() [1/2]

enum_nested_loop_state QEP_tmp_table::end_send ( )
virtual

Finish rnd/index scan after accumulating records, switch ref_array, and send accumulated records further.

Returns
return one of enum_nested_loop_state.

Window final tmp file optimization: rows have already been sent from end_write, so just return.

Implements QEP_operation.

◆ end_send() [2/2]

static enum_nested_loop_state end_send ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)
static

◆ end_send_count()

enum_nested_loop_state end_send_count ( JOIN join,
QEP_TAB qep_tab 
)

◆ end_send_group()

enum_nested_loop_state end_send_group ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)

◆ end_sj_materialize()

static enum_nested_loop_state end_sj_materialize ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)
static

◆ end_update()

static enum_nested_loop_state end_update ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)
static

Group by searching after group record and updating it if possible.

◆ end_write()

static enum_nested_loop_state end_write ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)
static

◆ end_write_group()

enum_nested_loop_state end_write_group ( JOIN join,
QEP_TAB *const  qep_tab,
bool  end_of_records 
)

◆ end_write_wf()

static enum_nested_loop_state end_write_wf ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)
static

Similar to end_write, but used in the windowing tmp table steps.

If we don't need to buffer rows to evaluate the window functions, execution is simple, see logic below. In that case we can just evaluate the window functions as we go here, similar to the non windowing flow, cf. copy_funcs below and in end_write.

If we do need buffering, though, we buffer the row here. Next, we enter a loop calling process_buffered_windowing_record and conditionally write (or send) the row onward. That is, if process_buffered_windowing_record was able to complete evaluation of a row (cf. output_row_ready), including its window functions given how much has already been buffered, we do the write (or send), else we exit, and postpone evaluation and writing till we have enough rows in the buffer.

When we have read a full partition (or reach EOF), we evaluate any remaining rows. Note that since we have to read one row past the current partition to detect that that previous row was indeed the last row in a partition, we need to re-establish the first row of the next partition when we are done processing the current one. This is because the record will be overwritten (many times) during evaluation of window functions in the current partition.

Usually [1], for window execution we have two or three tmp tables per windowing step involved:

  • The input table, corresponding to qep_tab-1. Holds (possibly sorted) records ready for windowing, sorted on expressions concatenated from any PARTITION BY and ORDER BY clauses.
  • The output table, corresponding to qep_tab: where we write the evaluated records from this step. Note that we may optimize away this last write if we have no final ORDER BY or DISTINCT, see write_or_send_row.
  • If we have buffering, the frame buffer, held by Window::m_frame_buffer[_param]

[1] This is not always the case. For the first window, if we have no PARTITION BY or ORDER BY in the window, and there is more than one table in the join, the logical input can consist of more than one table (qep_tab-1 .. qep_tab-n).

The first thing we do in this function, is: we copy fields from IN to OUT (copy_fields), and evaluate non-WF functions (copy_funcs): those functions then read their arguments from IN and store their result into their result_field which is a field in OUT. We then evaluate any HAVING, on OUT table. The next steps depend on if we have a FB (Frame Buffer) or not.

(a) If we have no FB, we immediately calculate the WFs over the OUT row, store their value in OUT row, and pass control to next plan operator (write_or_send_row) - we're done.

(b) If we have a FB, let's take SUM(A+FLOOR(B)) OVER (ROWS 2 FOLLOWING) as example. Above, we have stored A and the result of FLOOR in OUT. Now we buffer (save) the row into the FB: for that, we copy field A from IN to FB, and FLOOR's result_field from OUT to FB; a single copy_fields() call handles both copy jobs. Then we look at the rows we have buffered and may realize that we have enough of the frame to calculate SUM for a certain row (not necessarily the one we just buffered; might be an earlier row, in our example it is the row which is 2 rows above the buffered row). If we do, to calculate WFs, we bring back the frame's rows; which is done by: first copying field A and FLOOR's result_field in directions opposite to above (using one copy_fields), then copying field A from IN to OUT, thus getting in OUT all that SUM needs (A and FLOOR), then giving that OUT row to SUM (SUM will then add the row's value to its total; that happens in copy_funcs). After we have done that on all rows of the frame, we have the values of SUM ready in OUT, we also restore the row which owns this SUM value, in the same way as we restored the frame's rows, and we pass control to next plan operator (write_or_send_row) - we're done for this row. However, when the next plan operator is done and we regain control, we loop to check if we can calculate one more row with the frame we have, and if so, we do. Until we can't calculate any more row in which case we're back to just buffering.

◆ evaluate_join_record()

static enum_nested_loop_state evaluate_join_record ( JOIN join,
QEP_TAB *const  qep_tab 
)
static

Process one row of the nested loop join.

This function will evaluate parts of WHERE/ON clauses that are applicable to the partial row on hand and in case of success submit this row to the next level of the nested loop. join_tab->return_tab may be modified to cause a return to a previous join_tab.

Parameters
joinThe join object
qep_tabThe most inner qep_tab being processed
Returns
Nested loop state

◆ evaluate_null_complemented_join_record()

static enum_nested_loop_state evaluate_null_complemented_join_record ( JOIN join,
QEP_TAB qep_tab 
)
static

Construct a NULL complimented partial join record and feed it to the next level of the nested loop. This function is used in case we have an OUTER join and no matching record was found.

◆ exec()

void JOIN::exec ( )

Execute select, executor entry point.

Note
that EXPLAIN may come here (single-row derived table, uncorrelated scalar subquery in WHERE clause...).

◆ ExecuteIteratorQuery()

static int ExecuteIteratorQuery ( JOIN join)
static

◆ ExtractConditions()

static void ExtractConditions ( Item condition,
vector< Item * > *  condition_parts 
)
static

Split AND conditions into their constituent parts, recursively.

Conditions that are not AND conditions are appended unchanged onto condition_parts. E.g. if you have ((a AND b) AND c), condition_parts will contain [a, b, c], plus whatever it contained before the call.

◆ ExtractHashJoinConditions()

static void ExtractHashJoinConditions ( const QEP_TAB current_table,
qep_tab_map  left_tables,
vector< Item * > *  predicates,
vector< Item_func_eq * > *  hash_join_conditions,
vector< Item * > *  conditions_after_hash_join 
)
static

◆ FindSubstructure()

static Substructure FindSubstructure ( QEP_TAB qep_tabs,
const plan_idx  first_idx,
const plan_idx  this_idx,
const plan_idx  last_idx,
CallingContext  calling_context,
bool add_limit_1,
plan_idx substructure_end,
qep_tab_map unhandled_duplicates 
)
static

Given a range of tables (where we assume that we've already handled first_idx..(this_idx-1) as inner joins), figure out whether this is a semijoin, an outer join or a weedout.

In general, the outermost structure wins; if we are in one of the rare cases where there are e.g. coincident (first match) semijoins and weedouts, we do various forms of conflict resolution:

  • Unhandled weedouts will add elements to unhandled_duplicates (to be handled at the top level of the query).
  • Unhandled semijoins will either: Set add_limit_1 to true, which means a LIMIT 1 iterator should be added, or Add elements to unhandled_duplicates in situations that cannot be solved by a simple one-table, one-row LIMIT.

If not returning NONE, substructure_end will also be filled with where this sub-join ends (exclusive).

◆ free_blobs()

static void free_blobs ( Field **  ptr)
static

◆ get_end_select_func()

Next_select_func JOIN::get_end_select_func ( )

Rows produced by a join sweep may end up in a temporary table or be sent to a client. Setup the function of the nested loop join algorithm which handles final fully constructed and matched records.

Returns
end_select function to use. This function can't fail.

◆ get_exact_record_count()

ulonglong get_exact_record_count ( QEP_TAB qep_tab,
uint  table_count,
int *  error 
)

Get exact count of rows in all tables.

When this is called, at least one table's SE doesn't include HA_COUNT_ROWS_INSTANT.

Parameters
qep_tabList of qep_tab in this JOIN.
table_countCount of qep_tab in the JOIN.
error[out] Return any possible error. Else return 0
Returns
Cartesian product of count of the rows in all tables if success 0 if error.
Note
The "error" parameter is required for the sake of testcases like the one in innodb-wl6742.test:272. Earlier if an error was raised by ha_records, it wasn't handled by get_exact_record_count. Instead it was just allowed to go to the execution phase, where end_send_group would see the same error and raise it.

But with the new function 'end_send_count' in the execution phase, such an error should be properly returned so that it can be raised.

◆ GetTableIterator()

unique_ptr_destroy_only<RowIterator> GetTableIterator ( THD thd,
QEP_TAB qep_tab,
QEP_TAB qep_tabs 
)

Get the RowIterator used for scanning the given table, with any required materialization operations done first.

◆ GetTriggerCondOrNull()

static Item_func_trig_cond* GetTriggerCondOrNull ( Item item)
static

◆ group_rec_cmp()

static bool group_rec_cmp ( ORDER group,
uchar rec0,
uchar rec1 
)
static

Compare GROUP BY in from tmp table's record[0] and record[1].

Returns
true records are different false records are the same

◆ has_rollup_result()

bool has_rollup_result ( Item item)

Checks if an item has a ROLLUP NULL which needs to be written to temp table.

Parameters
itemItem for which we need to detect if ROLLUP NULL has to be written.
Returns
false if ROLLUP NULL need not be written for this item. true if it has to be written.

◆ having_is_true()

static bool having_is_true ( Item h)
static

Evaluates HAVING condition.

Returns
true if TRUE, false if FALSE or NULL
Note
this uses val_int() and relies on the convention that val_int() returns 0 when the value is NULL.

◆ Init() [1/8]

template<bool Reverse>
bool RefIterator< Reverse >::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [2/8]

bool RefOrNullIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [3/8]

bool EQRefIterator::Init ( )
overridevirtual

Read row using unique key: eq_ref access method implementation.

This is the "read_first" function for the eq_ref access method. The difference from ref access function is that it has a one-element lookup cache, maintained in record[0]. Since the eq_ref access method will always return the same row, it is not necessary to read the row more than once, regardless of how many times it is needed in execution. This cache element is used when a row is needed after it has been read once, unless a key conversion error has occurred, or the cache has been disabled.

Return values
0- Ok
-1- Row not found
1- Error

Implements RowIterator.

◆ Init() [4/8]

bool ConstIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [5/8]

bool FullTextSearchIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [6/8]

bool DynamicRangeIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [7/8]

bool PushedJoinRefIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ Init() [8/8]

bool AlternativeIterator::Init ( )
overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a TABLE_REF) and allow you to read the records anew.

Implements RowIterator.

◆ init_index_and_record_buffer()

static bool init_index_and_record_buffer ( const QEP_TAB qep_tab,
handler file,
uint  idx,
bool  sorted 
)
static

Initialize an index scan and the record buffer to use in the scan.

Parameters
qep_tabthe table to read
filethe handler to initialize
idxthe index to use
sorteduse the sorted order of the index
Return values
trueif an error occurred
falseon success

◆ init_sum_functions()

bool init_sum_functions ( Item_sum **  func_ptr,
Item_sum **  end_ptr 
)

◆ init_tmptable_sum_functions()

void init_tmptable_sum_functions ( Item_sum **  func_ptr)

◆ join_materialize_derived()

int join_materialize_derived ( QEP_TAB tab)

◆ join_materialize_semijoin()

int join_materialize_semijoin ( QEP_TAB tab)

◆ join_materialize_table_function()

int join_materialize_table_function ( QEP_TAB tab)

◆ join_read_const_table()

int join_read_const_table ( JOIN_TAB tab,
POSITION pos 
)

Reads content of constant table.

Parameters
tabtable
posposition of table in query plan
Return values
0ok, one row was found or one NULL-complemented row was created
-1ok, no row was found and no NULL-complemented row was created
1error

◆ join_setup_iterator()

void join_setup_iterator ( QEP_TAB tab)

Prepare table for reading rows and read first record.

Prior to reading the table following tasks are done, (in the order of execution): .) derived tables are materialized .) pre-iterator executor only: duplicates removed (tmp tables only) .) table is sorted with filesort (both non-tmp and tmp tables) After this have been done this function resets quick select, if it's present, sets up table reading functions, and reads first record.

Return values
0Ok
-1End of records
1Error

◆ make_group_fields()

bool make_group_fields ( JOIN main_join,
JOIN curr_join 
)

allocate group fields or take prepared (cached).

Parameters
main_joinjoin of current select
curr_joincurrent join (join of current select or temporary copy of it)
Return values
0ok
1failed

◆ MarkUnhandledDuplicates()

static void MarkUnhandledDuplicates ( SJ_TMP_TABLE weedout,
plan_idx  weedout_start,
plan_idx  weedout_end,
qep_tab_map unhandled_duplicates 
)
static

For a given duplicate weedout operation, figure out which tables are supposed to be deduplicated by it, and add those to unhandled_duplicates.

(SJ_TMP_TABLE contains the deduplication key, which is exactly the complement of the tables to be deduplicated.)

◆ optimize_distinct()

void JOIN::optimize_distinct ( )
private

Optimize distinct when used on a subset of the tables.

E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b In this case we can stop scanning t2 when we have found one t1.a

◆ pfs_batch_update()

bool QEP_TAB::pfs_batch_update ( JOIN join) const

◆ pick_table_access_method()

void QEP_TAB::pick_table_access_method ( )

Pick the appropriate access method functions.

Sets the functions for the selected table access method

◆ PossiblyAttachFilterIterator() [1/2]

unique_ptr_destroy_only<RowIterator> PossiblyAttachFilterIterator ( unique_ptr_destroy_only< RowIterator iterator,
const vector< Item * > &  conditions,
THD thd 
)

Return a new iterator that wraps "iterator" and that tests all of the given conditions (if any), ANDed together.

If there are no conditions, just return the given iterator back.

◆ PossiblyAttachFilterIterator() [2/2]

static unique_ptr_destroy_only<RowIterator> PossiblyAttachFilterIterator ( unique_ptr_destroy_only< RowIterator iterator,
const vector< PendingCondition > &  conditions,
THD thd 
)
static

◆ prepare_scan()

bool QEP_TAB::prepare_scan ( )

Prepare table to be scanned.

This function is the place to do any work on the table that needs to be done before table can be scanned. Currently it materializes derived tables and semi-joined subqueries, binds buffer for current rowid and removes duplicates if needed.

Returns
false - Ok, true - error

◆ prepare_sum_aggregators()

bool prepare_sum_aggregators ( Item_sum **  func_ptr,
bool  need_distinct 
)

◆ prepare_tmp_table()

bool QEP_tmp_table::prepare_tmp_table ( )
private

Instantiate tmp table and start index scan if necessary.

Extend executor to avoid tmp table creation when no rows were written into tmp table.

Returns
true error false ok

◆ process_buffered_windowing_record()

bool process_buffered_windowing_record ( THD thd,
Temp_table_param param,
const bool  new_partition_or_eof,
bool output_row_ready 
)

While there are more unprocessed rows ready to process given the current partition/frame state, process such buffered rows by evaluating/aggregating the window functions defined over this window on the current frame, moving the frame if required.

This method contains the main execution time logic of the evaluation window functions if we need buffering for one or more of the window functions defined on the window.

Moving (sliding) frames can be executed using a naive or optimized strategy for aggregate window functions, like SUM or AVG (but not MAX, or MIN). In the naive approach, for each row considered for processing from the buffer, we visit all the rows defined in the frame for that row, essentially leading to N*M complexity, where N is the number of rows in the result set, and M is the number for rows in the frame. This can be slow for large frames, obviously, so we can choose an optimized evaluation strategy using inversion. This means that when rows leave the frame as we move it forward, we re-use the previous aggregate state, but compute the inverse function to eliminate the contribution to the aggregate by the row(s) leaving the frame, and then use the normal aggregate function to add the contribution of the rows moving into the frame. The present method contains code paths for both strategies.

For integral data types, this is safe in the sense that the result will be the same if no overflow occurs during normal evaluation. For floating numbers, optimizing in this way may lead to different results, so it is not done by default, cf the session variable "windowing_use_high_precision".

Since the evaluation strategy is chosen based on the "most difficult" window function defined on the window, we must also be able to evaluate non-aggregates like ROW_NUMBER, NTILE, FIRST_VALUE in the code path of the optimized aggregates, so there is redundant code for those in the naive and optimized code paths. Note that NTILE forms a class of its own of the non-aggregates: it needs two passes over the partition's rows since the cardinality is needed to compute it. Furthermore, FIRST_VALUE and LAST_VALUE heed the frames, but they are not aggregates.

The is a special optimized code path for static aggregates: when the window frame is the default, e.g. the entire partition and there is no ORDER BY specified, the value of the framing window functions, i.e. SUM, AVG, FIRST_VALUE, LAST_VALUE can be evaluated once and for all and saved when we visit and evaluate the first row of the partition. For later rows we restore the aggregate values and just fill in the other fields and evaluate non-framing window functions for the row.

The code paths both for naive execution and optimized execution differ depending on whether we have ROW or RANGE boundaries in a explicit frame.

A word on BLOBs. Below we make copies of rows into the frame buffer. This is a temporary table, so BLOBs get copied in the normal way.

Sometimes we save records containing already computed framing window functions away into memory only: is the lifetime of the referenced BLOBs long enough? We have two cases:

BLOB results from wfs: Any BLOB results will reside in the copies in result fields of the Items ready for the out file, so they no longer need any BLOB memory read from the frame buffer tmp file.

BLOB fields not evaluated by wfs: Any other BLOB field will be copied as well, and would not have life-time past the next read from the frame buffer, but they are never used since we fill in the fields from the current row after evaluation of the window functions, so we don't need to make special copies of such BLOBs. This can be (and was) tested by shredding any BLOBs deallocated by InnoDB at the next read.

We also save away in memory the next record of the next partition while processing the current partition. Any blob there will have its storage from the read of the input file, but we won't be touching that for reading again until after we start processing the next partition and save the saved away next partition row to the frame buffer.

Note that the logic of this function is centered around the window, not around the window function. It is about putting rows in a partition, in a frame, in a set of peers, and passing this information to all window functions attached to this window; each function looks at the partition, frame, or peer set in its own particular way (for example RANK looks at the partition, SUM looks at the frame).

Parameters
thdCurrent thread
paramCurrent temporary table
new_partition_or_eofTrue if (we are about to start a new partition and there was a previous partition) or eof
[out]output_row_readyTrue if there is a row record ready to write to the out table
Returns
true if error

The current window

The frame

This is the row we are currently considering for processing and getting ready for output, cf. output_row_ready.

This is the row number of the last row we have buffered so far.

If true, use code path for static aggregates

If true, use code path for ROW bounds with optimized strategy

If true, use code path for RANGE bounds with optimized strategy

We need to evaluate FIRST_VALUE, or optimized MIN/MAX

We need to evaluate LAST_VALUE, or optimized MIN/MAX

We need to evaluate NTH_VALUE

We need to evaluate LEAD/LAG rows

True if an inversion optimization strategy is used. For common code paths.

RANGE was specified as the bounds unit for the frame

UNBOUNDED FOLLOWING was specified for the frame

Row_number of the first row in the frame. Invariant: lower_limit >= 1 after initialization.

Row_number of the logically last row to be computed in the frame, may be higher than the number of rows in the partition. The actual highest row number is computed later, see upper below.

needs peerset of current row to evaluate a wf for the current row.

needs the last peer of the current row within a frame.

For optimized strategy we want to save away the previous aggregate result and reuse in later round by inversion. This keeps track of whether we managed to compute results for this current row (result are "primed"), so we can use inversion in later rows. Cf Window::m_aggregates_primed.

Possible adjustment of the logical upper_limit: no rows exist beyond last_rowno_in_cache.

< iterates over rows in a frame

< RANGE: # of visited rows seen before the frame

Whether we know the start of the frame yet. The a priori setting is inherited from the previous current row.

◆ process_wfs_needing_card()

static bool process_wfs_needing_card ( THD thd,
Temp_table_param param,
const Window::st_nth have_nth_value,
const Window::st_lead_lag have_lead_lag,
const int64  current_row,
Window w,
Window_retrieve_cached_row_reason  current_row_reason 
)
static

Process window functions that need partition cardinality.

◆ put_record()

enum_nested_loop_state QEP_tmp_table::put_record ( bool  end_of_records)
private

Prepare table if necessary and call write_func to save record.

Parameters
end_of_recordsThe end_of_record signal to pass to the writer
Returns
return one of enum_nested_loop_state.

◆ Read() [1/6]

int RefOrNullIterator::Read ( )
overridevirtual

Read a single row.

The row data is not actually returned from the function; it is put in the table's (or tables', in case of a join) record buffer, ie., table->records[0].

Return values
0OK
-1End of records
1Error

Implements RowIterator.

◆ Read() [2/6]

int EQRefIterator::Read ( )
overridevirtual

Read row using unique key: eq_ref access method implementation.

The difference from RefIterator is that it has a one-element lookup cache, maintained in record[0]. Since the eq_ref access method will always return the same row, it is not necessary to read the row more than once, regardless of how many times it is needed in execution. This cache element is used when a row is needed after it has been read once, unless a key conversion error has occurred, or the cache has been disabled.

Return values
0- Ok
-1- Row not found
1- Error

Implements RowIterator.

◆ Read() [3/6]

int ConstIterator::Read ( )
overridevirtual

Read a constant table when there is at most one matching row, using an index lookup.

Return values
0Row was found
-1Row was not found
1Got an error (other than row not found) during read

Implements RowIterator.

◆ Read() [4/6]

int FullTextSearchIterator::Read ( )
overridevirtual

Read a single row.

The row data is not actually returned from the function; it is put in the table's (or tables', in case of a join) record buffer, ie., table->records[0].

Return values
0OK
-1End of records
1Error

Implements RowIterator.

◆ Read() [5/6]

int DynamicRangeIterator::Read ( )
overridevirtual

Read a single row.

The row data is not actually returned from the function; it is put in the table's (or tables', in case of a join) record buffer, ie., table->records[0].

Return values
0OK
-1End of records
1Error

Implements RowIterator.

◆ Read() [6/6]

int PushedJoinRefIterator::Read ( )
overridevirtual

Read a single row.

The row data is not actually returned from the function; it is put in the table's (or tables', in case of a join) record buffer, ie., table->records[0].

Return values
0OK
-1End of records
1Error

Implements RowIterator.

◆ read_const()

static int read_const ( TABLE table,
TABLE_REF ref 
)
static

◆ read_frame_buffer_row()

static bool read_frame_buffer_row ( int64  rowno,
Window w,
bool  for_nth_value 
)
static

Read row rowno from frame buffer tmp file using cached row positions to minimize positioning work.

◆ read_system()

static int read_system ( TABLE table)
static

Read a constant table when there is at most one matching row, using a table scan.

Parameters
tableTable to read
Return values
0Row was found
-1Row was not found
1Got an error (other than row not found) during read

◆ record_prefix_size()

static size_t record_prefix_size ( const QEP_TAB qep_tab)
static

Find out how many bytes it takes to store the smallest prefix which covers all the columns that will be read from a table.

Parameters
qep_tabthe table to read
Returns
the size of the smallest prefix that covers all records to be read from the table

◆ refresh_lateral()

void QEP_TAB::refresh_lateral ( )

Instructs each lateral derived table depending on this QEP_TAB, to rematerialize itself before emitting rows.

◆ RefToString()

string RefToString ( const TABLE_REF ref,
const KEY key,
bool  include_nulls 
)

◆ remove_dup_with_compare()

static bool remove_dup_with_compare ( THD thd,
TABLE entry,
Field **  field,
ulong  offset,
Item having 
)
static

◆ remove_dup_with_hash_index()

static bool remove_dup_with_hash_index ( THD thd,
TABLE table,
Field **  first_field,
const size_t *  field_lengths,
size_t  key_length,
Item having 
)
static

Generate a hash index for each row to quickly find duplicate rows.

Note
Note that this will not work on tables with blobs!

◆ remove_duplicates()

bool QEP_TAB::remove_duplicates ( )

◆ report_handler_error()

int report_handler_error ( TABLE table,
int  error 
)

Help function when we get some an error from the table handler.

◆ reset_framing_wf_states()

static void reset_framing_wf_states ( Func_ptr_array func_ptr)
inlinestatic

Walk the function calls and reset any framing window function's window state.

Parameters
func_ptran array of function call items which might represent or contain window function calls

◆ reset_non_framing_wf_state()

static void reset_non_framing_wf_state ( Func_ptr_array func_ptr)
inlinestatic

Walk the function calls and reset any non-framing window function's window state.

Parameters
func_ptran array of function call items which might represent or contain window function calls

◆ reset_wf_states()

static void reset_wf_states ( Func_ptr_array func_ptr,
bool  framing 
)
inlinestatic

Minion for reset_framing_wf_states and reset_non_framing_wf_state, q.v.

Parameters
func_ptrthe set of functions
framingtrue if we want to reset for framing window functions

◆ restore_fields()

void JOIN::restore_fields ( table_map  save_nullinfo)

Restore all result fields for all tables specified in save_nullinfo.

Parameters
save_nullinfoSet of tables for which restore is necessary.
Note
Const tables must have their NULL value flags restored,
See also
JOIN::clear_fields().

◆ restore_special_record()

void Window::restore_special_record ( uint64  special_rowno,
uchar record 
)

Restore row special_rowno into record from in-memory copy.

Any fields not the result of window functions are not used, but they do tag along here (unnecessary copying..). BLOBs: have storage in result_field of Item for the window function although the pointer is copied here. The result field storage is stable across reads from the frame buffer, so safe.

◆ return_zero_rows()

static void return_zero_rows ( JOIN join,
List< Item > &  fields 
)
static

For some reason, e.g.

due to an impossible WHERE clause, the tables cannot possibly contain any rows that will be in the result. This function is used to return with a result based on no matching rows (i.e., an empty result or one row with aggregates calculated without using rows in the case of implicit grouping) before the execution of nested loop join.

This function may evaluate the HAVING clause and is only meant for result sets that are empty due to an impossible HAVING clause. Do not use it if HAVING has already been evaluated.

Parameters
joinThe join that does not produce a row
fieldsFields in result

◆ rollup_send_data()

bool JOIN::rollup_send_data ( uint  idx)

Send all rollup levels higher than the current one to the client.

SAMPLE

SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
Parameters
idxLevel we are on:
  • 0 = Total sum level
  • 1 = First group changed (a)
  • 2 = Second group changed (a,b)
Returns
false if success, true if error

◆ rollup_write_data()

bool JOIN::rollup_write_data ( uint  idx,
QEP_TAB qep_tab 
)

Write all rollup levels higher than the current one to a temp table.

SAMPLE

SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP
Parameters
idxLevel we are on:
  • 0 = Total sum level
  • 1 = First group changed (a)
  • 2 = Second group changed (a,b)
qep_tabtemp table
Returns
false if success, true if error

◆ safe_index_read()

int safe_index_read ( QEP_TAB tab)

◆ save_special_record()

void Window::save_special_record ( uint64  special_rowno,
TABLE t 
)

Save row special_rowno in table t->record[0] to an in-memory copy for later restoration.

◆ set_record_buffer()

bool set_record_buffer ( const QEP_TAB tab)

Allocate a data buffer that the storage engine can use for fetching batches of records.

A buffer is only allocated if ha_is_record_buffer_wanted() returns true for the handler, and the scan in question is of a kind that could be expected to benefit from fetching records in batches.

Parameters
tabthe table to read
Return values
trueif an error occurred when allocating the buffer
falseif a buffer was successfully allocated, or if a buffer was not attempted allocated

◆ SetCostOnHashJoinIterator()

void SetCostOnHashJoinIterator ( const Cost_model_server cost_model,
const POSITION pos_right,
RowIterator iterator 
)

◆ SetCostOnNestedLoopIterator()

void SetCostOnNestedLoopIterator ( const Cost_model_server cost_model,
const POSITION pos_right,
RowIterator iterator 
)

◆ SetCostOnTableIterator()

void SetCostOnTableIterator ( const Cost_model_server cost_model,
const POSITION pos,
bool  is_after_filter,
RowIterator iterator 
)
static

◆ setup_copy_fields()

bool setup_copy_fields ( List< Item > &  all_fields,
size_t  num_select_elements,
THD thd,
Temp_table_param param,
Ref_item_array  ref_item_array,
List< Item > *  res_selected_fields,
List< Item > *  res_all_fields 
)

Sets up caches for holding the values of non-aggregated expressions.

The values are saved at the start of every new group.

This code path is used in the cases when aggregation can be performed without a temporary table. Why it still uses a Temp_table_param is a mystery.

Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups. Change old item_field to use a new field with points at saved fieldvalue This function is only called before use of send_result_set_metadata.

Parameters
all_fieldsall fields list; should really be const, but Item does not always respect constness
num_select_elementsnumber of elements in select item list
thdTHD pointer
[in,out]paramtemporary table parameters
[out]ref_item_arrayarray of pointers to top elements of field list
[out]res_selected_fieldsnew list of items of select item list
[out]res_all_fieldsnew list of all items
Returns
false if success, true if error

◆ setup_sum_funcs()

bool setup_sum_funcs ( THD thd,
Item_sum **  func_ptr 
)

Call setup() for all sum functions.

Parameters
thdthread handler
func_ptrsum function list
Return values
falseok
trueerror

◆ setup_tmptable_write_func()

void setup_tmptable_write_func ( QEP_TAB tab,
Opt_trace_object trace 
)

Setup write_func of QEP_tmp_table object.

Parameters
tabQEP_TAB of a tmp table
traceOpt_trace_object to add to

Function sets up write_func according to how QEP_tmp_table object that is attached to the given join_tab will be used in the query.

◆ SplitConditions()

void SplitConditions ( Item condition,
vector< Item * > *  predicates_below_join,
vector< PendingCondition > *  predicates_above_join 
)

◆ sub_select()

enum_nested_loop_state sub_select ( JOIN join,
QEP_TAB *const  qep_tab,
bool  end_of_records 
)

Retrieve records ends with a given beginning from the result of a join.

For a given partial join record consisting of records from the tables preceding the table join_tab in the execution plan, the function retrieves all matching full records from the result set and send them to the result set stream.

Note
The function effectively implements the final (n-k) nested loops of nested loops join algorithm, where k is the ordinal number of the join_tab table and n is the total number of tables in the join query. It performs nested loops joins with all conjunctive predicates from the where condition pushed as low to the tables as possible. E.g. for the query
SELECT * FROM t1,t2,t3
WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1, given the selected plan prescribes to nest retrievals of the joined tables in the following order: t1,t2,t3. A pushed down predicate are attached to the table which it pushed to, at the field join_tab->cond. When executing a nested loop of level k the function runs through the rows of 'join_tab' and for each row checks the pushed condition attached to the table. If it is false the function moves to the next row of the table. If the condition is true the function recursively executes (n-k-1) remaining embedded nested loops. The situation becomes more complicated if outer joins are involved in the execution plan. In this case the pushed down predicates can be checked only at certain conditions. Suppose for the query
SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
the optimizer has chosen a plan with the table order t1,t2,t3. The predicate P1=t1>2 will be pushed down to the table t1, while the predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table t2. But the second predicate can not be unconditionally tested right after a row from t2 has been read. This can be done only after the first row with t3.a=t1.a has been encountered. Thus, the second predicate P2 is supplied with a guarded value that are stored in the field 'found' of the first inner table for the outer join (table t2). When the first row with t3.a=t1.a for the current row of table t1 appears, the value becomes true. For now on the predicate is evaluated immediately after the row of table t2 has been read. When the first row with t3.a=t1.a has been encountered all conditions attached to the inner tables t2,t3 must be evaluated. Only when all of them are true the row is sent to the output stream. If not, the function returns to the lowest nest level that has a false attached condition. The predicates from on expressions are also pushed down. If in the the above example the on expression were (t3.a=t1.a AND t2.a=t1.a), then t1.a=t2.a would be pushed down to table t2, and without any guard. If after the run through all rows of table t2, the first inner table for the outer join operation, it turns out that no matches are found for the current row of t1, then current row from table t1 is complemented by nulls for t2 and t3. Then the pushed down predicates are checked for the composed row almost in the same way as it had been done for the first row with a match. The only difference is the predicates from on expressions are not checked.
IMPLEMENTATION
The function forms output rows for a current partial join of k tables tables recursively. For each partial join record ending with a certain row from join_tab it calls sub_select that builds all possible matching tails from the result set. To be able check predicates conditionally items of the class Item_func_trig_cond are employed. An object of this class is constructed from an item of class COND and a pointer to a guarding boolean variable. When the value of the guard variable is true the value of the object is the same as the value of the predicate, otherwise it's just returns true.

Testing predicates at the optimal time can be tricky, especially for outer joins. Consider the following query:

SELECT * FROM t1
LEFT JOIN
(t2 JOIN t3 ON t2.a=t3.a)
ON t1.a=t2.a
WHERE t2.b=5 OR t2.b IS NULL

(The OR ... IS NULL is solely so that the outer join can not be rewritten to an inner join.)

Suppose the chosen execution plan dictates the order t1,t2,t3, and suppose that we have found a row t1 and are scanning t2. We cannot filter rows from t2 as we see them, as the LEFT JOIN needs to know that there existed at least one (t2,t3) tuple matching t1, so that it should not synthesize a NULL-complemented row.

However, once we have a matching t3, we can activate the predicate (t2.b=5 OR t2.b IS NULL). (Note that it does not refer to t3 at all.) If it fails, we should immediately stop scanning t3 and go back to scanning t2 (or in general, arbitrarily early), which is done by setting the field 'return_tab' of the JOIN.

Now consider a similar but more complex case:

SELECT * FROM t1
LEFT JOIN
(t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
ON t4.a=t2.a
WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)

In order not to re-evaluate the predicates that were already evaluated as attached pushed down predicates, a pointer to the the first most inner unmatched table is maintained in join_tab->first_unmatched. Thus, when the first row from t5 with t5.a=t3.a is found this pointer for t5 is changed from t4 to t2.

STRUCTURE NOTES
join_tab->first_unmatched points always backwards to the first inner table of the embedding nested join, if any.
Parameters
joinpointer to the structure providing all context info for the query
qep_tabthe first next table of the execution plan to be retrieved
end_of_recordstrue when we need to perform final steps of retreival
Returns
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.

◆ sub_select_op()

enum_nested_loop_state sub_select_op ( JOIN join,
QEP_TAB qep_tab,
bool  end_of_records 
)

Accumulate full or partial join result in operation and send operation's result further.

Parameters
joinpointer to the structure providing all context info for the query
qep_tabthe QEP_TAB object to which the operation is attached
end_of_recordstrue <=> all records were accumulated, send them further

This function accumulates records, one by one, in QEP operation's buffer by calling op->put_record(). When there is no more records to save, in this case the end_of_records argument == true, function tells QEP operation to send records further by calling op->send_records(). When all records are sent this function passes 'end_of_records' signal further by calling sub_select() with end_of_records argument set to true. After that op->end_send() is called to tell QEP operation that it could end internal buffer scan.

Note
This function is not expected to be called when dynamic range scan is used to scan join_tab because join cache is disabled for such scan and range scans aren't used for tmp tables.
See also
setup_join_buffering For caches the function implements the algorithmic schema for both Blocked Nested Loop Join and Batched Key Access Join. The difference can be seen only at the level of of the implementation of the put_record and send_records virtual methods for the cache object associated with the join_tab.
Returns
return one of enum_nested_loop_state.

◆ table_rec_cmp()

static bool table_rec_cmp ( TABLE table)
static

Compare GROUP BY in from tmp table's record[0] and record[1].

Returns
true records are different false records are the same

◆ unique_hash()

ulonglong unique_hash ( const Field field,
ulonglong hash_val 
)

Generate hash for a field.

Returns
generated hash

◆ unique_hash_fields()

static ulonglong unique_hash_fields ( TABLE table)
static

◆ unique_hash_group()

static ulonglong unique_hash_group ( ORDER group)
static

Generate hash for unique constraint according to group-by list.

This reads the values of the GROUP BY expressions from fields so assumes those expressions have been computed and stored into fields of a temporary table; in practice this means that copy_fields() and copy_funcs() must have been called.

◆ UnlockRow()

void EQRefIterator::UnlockRow ( )
overridevirtual

Since EQRefIterator may buffer a record, do not unlock it if it was not used in this invocation of EQRefIterator::Read().

Only count locks, thus remembering if the record was left unused, and unlock already when pruning the current value of TABLE_REF buffer.

See also
EQRefIterator::Read()

Implements RowIterator.

◆ update_const_equal_items()

static bool update_const_equal_items ( THD thd,
Item cond,
JOIN_TAB tab 
)
static

Check appearance of new constant items in multiple equalities of a condition after reading a constant table.

The function retrieves the cond condition and for each encountered multiple equality checks whether new constants have appeared after reading the constant (single row) table tab. If so it adjusts the multiple equality appropriately.

Parameters
thdthread handler
condcondition whose multiple equalities are to be checked
tabconstant table that has been read

◆ update_item_cache_if_changed()

int update_item_cache_if_changed ( List< Cached_item > &  list)

◆ update_sum_func()

bool update_sum_func ( Item_sum **  func_ptr)

◆ update_tmptable_sum_func()

void update_tmptable_sum_func ( Item_sum **  func_ptr,
TABLE tmp_table 
)

Update record 0 in tmp_table from record 1.

◆ use_order()

bool QEP_TAB::use_order ( ) const

Use ordering provided by chosen index?

Check if access to this JOIN_TAB has to retrieve rows in sorted order as defined by the ordered index used to access this table.

◆ write_or_send_row()

static enum_nested_loop_state write_or_send_row ( JOIN join,
QEP_TAB *const  qep_tab,
TABLE *const  table,
Temp_table_param *const  out_tbl 
)
inlinestatic

The last step in a series of windows do not need to write a tmp file if both a) and b) holds:

a) no SELECT DISTINCT b) no final ORDER BY

that have not been eliminated. If the condition is true, we send the data direct over the protocol to save the trip back and from the tmp file

Variable Documentation

◆ MAX_RECORD_BUFFER_SIZE

constexpr size_t MAX_RECORD_BUFFER_SIZE = 128 * 1024
staticconstexpr

Maximum amount of space (in bytes) to allocate for a Record_buffer.

ROLLUP
Definition: sql_optimizer.h:83
NULL
#define NULL
Definition: types.h:55
JOIN
Definition: sql_optimizer.h:174
histograms::enum_operator::BETWEEN
@ BETWEEN
Ternary_option::ON
@ ON
consts::SELECT
const std::string SELECT("SELECT")
Name of the static privileges.