#include <hash_join_iterator.h>

Inheritance diagram for HashJoinIterator:

Public Member Functions
	HashJoinIterator (THD thd, unique_ptr_destroy_only< RowIterator > build_input, const Prealloced_array< TABLE , 4 > &build_input_tables, double estimated_build_rows, unique_ptr_destroy_only< RowIterator > probe_input, const Prealloced_array< TABLE , 4 > &probe_input_tables, bool store_rowids, table_map tables_to_get_rowid_for, size_t max_memory_available, const std::vector< HashJoinCondition > &join_conditions, bool allow_spill_to_disk, JoinType join_type, const Mem_root_array< Item > &extra_conditions, std::span< AccessPath * > single_row_index_lookups, HashJoinInput first_input, bool probe_input_batch_mode, uint64_t *hash_table_generation)
	Construct a HashJoinIterator. More...

bool	Init () override
	Initialize or reinitialize the iterator. More...

int	Read () override
	Read a single row. More...

void	SetNullRowFlag (bool is_null_row) override
	Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag is true, you'll get only NULLs no matter what is actually in the buffer (typically some old leftover row). More...

void	EndPSIBatchModeIfStarted () override
	Ends performance schema batch mode, if started. More...

void	UnlockRow () override

int	ChunkCount ()

Public Member Functions inherited from RowIterator
	RowIterator (THD *thd)

virtual	~RowIterator ()=default

	RowIterator (const RowIterator &)=delete

	RowIterator (RowIterator &&)=default

virtual const IteratorProfiler *	GetProfiler () const
	Get profiling data for this iterator (for 'EXPLAIN ANALYZE'). More...

virtual void	SetOverrideProfiler (const IteratorProfiler *profiler)

virtual void	StartPSIBatchMode ()
	Start performance schema batch mode, if supported (otherwise ignored). More...

virtual RowIterator *	real_iterator ()
	If this iterator is wrapping a different iterator (e.g. More...

virtual const RowIterator *	real_iterator () const

Static Public Attributes
static constexpr uint32_t	kChunkPartitioningHashSeed {899339}

static constexpr size_t	kMaxChunks = 128

Private Types
enum class	State { READING_ROW_FROM_PROBE_ITERATOR , READING_ROW_FROM_PROBE_CHUNK_FILE , READING_ROW_FROM_PROBE_ROW_SAVING_FILE , LOADING_NEXT_CHUNK_PAIR , READING_FIRST_ROW_FROM_HASH_TABLE , READING_FROM_HASH_TABLE , END_OF_ROWS }

enum class	HashJoinType { IN_MEMORY , SPILL_TO_DISK , IN_MEMORY_WITH_HASH_TABLE_REFILL }

Private Member Functions
bool	BuildHashTable ()
	Read all rows from the build input and store the rows into the in-memory hash table. More...

bool	WriteBuildTableToChunkFiles ()
	Write all the remaining rows from the build table input to chunk files on disk. More...

bool	ReadNextHashJoinChunk ()
	Read all rows from the next chunk file into the in-memory hash table. More...

bool	ReadRowFromProbeIterator ()
	Read a single row from the probe iterator input into the tables' record buffers. More...

bool	ReadRowFromProbeChunkFile ()
	Read a single row from the current probe chunk file into the tables' record buffers. More...

bool	ReadRowFromProbeRowSavingFile ()
	Read a single row from the probe row saving file into the tables' record buffers. More...

void	LookupProbeRowInHashTable ()

int	ReadJoinedRow ()
	Take the next matching row from the hash table, and put the row into the build tables' record buffers. More...

bool	on_disk_hash_join () const

bool	WriteProbeRowToDiskIfApplicable ()
	Write the last row read from the probe input out to chunk files on disk, if applicable. More...

bool	JoinedRowPassesExtraConditions () const

bool	RejectDuplicateKeys () const
	If true, reject duplicate keys in the hash table. More...

bool	InitRowBuffer ()
	Clear the row buffer and reset all iterators pointing to it. More...

bool	InitProbeIterator ()
	Prepare to read the probe iterator from the beginning, and enable batch mode if applicable. More...

bool	InitWritingToProbeRowSavingFile ()
	Mark that probe row saving is enabled, and prepare the probe row saving file for writing. More...

bool	InitReadingFromProbeRowSavingFile ()
	Mark that we should read from the probe row saving file. More...

void	SetReadingProbeRowState ()
	Set the iterator state to the correct READING_ROW_FROM_PROBE_*-state. More...

int	ReadNextJoinedRowFromHashTable ()
	Read a joined row from the hash table, and see if it passes any extra conditions. More...

bool	ReadFirstProbeRow ()
	Helper function for Init(). More...

bool	InitHashTable ()
	Helper function for Init(). More...

Private Attributes
State	m_state

uint64_t *	m_hash_table_generation

uint64_t	m_last_hash_table_generation

const unique_ptr_destroy_only< RowIterator >	m_build_input

const unique_ptr_destroy_only< RowIterator >	m_probe_input

LinkedImmutableString	m_current_row {nullptr}

pack_rows::TableCollection	m_probe_input_tables

pack_rows::TableCollection	m_build_input_tables

hash_join_buffer::HashJoinRowBuffer	m_row_buffer

Prealloced_array< HashJoinCondition, 4 >	m_join_conditions

Mem_root_array< ChunkPair >	m_chunk_files_on_disk

int	m_current_chunk {-1}

ha_rows	m_build_chunk_current_row = 0

ha_rows	m_probe_chunk_current_row = 0

const double	m_estimated_build_rows

String	m_temporary_row_and_join_key_buffer

HashJoinInput	m_first_input
	The first input (build or probe) to read from. More...

bool	m_probe_input_batch_mode {false}

bool	m_allow_spill_to_disk {true}

bool	m_build_iterator_has_more_rows {true}

const JoinType	m_join_type

Item *	m_extra_condition {nullptr}

bool	m_write_to_probe_row_saving {false}

bool	m_read_from_probe_row_saving {false}

HashJoinChunk	m_probe_row_saving_write_file

HashJoinChunk	m_probe_row_saving_read_file

ha_rows	m_probe_row_saving_read_file_current_row {0}

std::span< AccessPath * >	m_single_row_index_lookups

HashJoinType	m_hash_join_type {HashJoinType::IN_MEMORY}

bool	m_probe_row_match_flag {false}

bool	m_probe_row_read {false}
	If true, a row was already read from the probe input, in order to check if that input was empty. More...

Additional Inherited Members
Protected Member Functions inherited from RowIterator
THD *	thd () const

Member Enumeration Documentation

◆ HashJoinType

enum class HashJoinIterator::HashJoinType

strongprivate

Enumerator
IN_MEMORY
SPILL_TO_DISK
IN_MEMORY_WITH_HASH_TABLE_REFILL

◆ State

enum class HashJoinIterator::State

strongprivate

Enumerator
READING_ROW_FROM_PROBE_ITERATOR
READING_ROW_FROM_PROBE_CHUNK_FILE
READING_ROW_FROM_PROBE_ROW_SAVING_FILE
LOADING_NEXT_CHUNK_PAIR
READING_FIRST_ROW_FROM_HASH_TABLE
READING_FROM_HASH_TABLE
END_OF_ROWS

Constructor & Destructor Documentation

◆ HashJoinIterator()

HashJoinIterator::HashJoinIterator	(	THD *	thd,
		unique_ptr_destroy_only< RowIterator >	build_input,
		const Prealloced_array< TABLE *, 4 > &	build_input_tables,
		double	estimated_build_rows,
		unique_ptr_destroy_only< RowIterator >	probe_input,
		const Prealloced_array< TABLE *, 4 > &	probe_input_tables,
		bool	store_rowids,
		table_map	tables_to_get_rowid_for,
		size_t	max_memory_available,
		const std::vector< HashJoinCondition > &	join_conditions,
		bool	allow_spill_to_disk,
		JoinType	join_type,
		const Mem_root_array< Item * > &	extra_conditions,
		std::span< AccessPath * >	single_row_index_lookups,
		HashJoinInput	first_input,
		bool	probe_input_batch_mode,
		uint64_t *	hash_table_generation
	)

Construct a HashJoinIterator.

Parameters

thd	the thread handle
build_input	the iterator for the build input
build_input_tables	a list of all the tables in the build input. The tables are needed for two things: 1) Accessing the columns when creating the join key during creation of the hash table, 2) and accessing the column data when creating the row to be stored in the hash table and/or the chunk file on disk.
estimated_build_rows	How many rows we assume there will be when reading the build input. This is used to choose how many chunks we break it into on disk.
probe_input	the iterator for the probe input
probe_input_tables	the probe input tables. Needed for the same reasons as build_input_tables.
store_rowids	whether we need to make sure row ids are available for all tables below us, after Read() has been called. used only if we are below a weedout operation.
tables_to_get_rowid_for	a map of which tables we need to call position() for ourselves. tables that are in build_input_tables but not in this map, are expected to be handled by some other iterator. tables that are in this map but not in build_input_tables will be ignored.
max_memory_available	the amount of memory available, in bytes, for this hash join iterator. This can be user-controlled by setting the system variable join_buffer_size.
join_conditions	a list of all the join conditions between the two inputs
allow_spill_to_disk	whether the hash join can spill to disk. This is set to false in some cases where we have a LIMIT in the query
join_type	The join type.
extra_conditions	A list of extra conditions that the iterator will evaluate after a lookup in the hash table is done, but before the row is returned. The conditions are AND-ed together into a single Item.
single_row_index_lookups	All the single-row index lookups in the build input and probe input.
first_input	The first input (build or probe) to read from. (If this is empty, we will not have to read from the other.)
probe_input_batch_mode	Whether we need to enable batch mode on the probe input table. Only make sense if it is a single table, and we are not on the outer side of any nested loop join.
hash_table_generation	If this is non-nullptr, it is a counter of how many times the query block the iterator is a part of has been asked to clear hash tables, since outer references may have changed value. It is used to know when we need to drop our hash table; when the value changes, we need to drop it. If it is nullptr, we always drop it on Init().

Member Function Documentation

◆ BuildHashTable()

bool HashJoinIterator::BuildHashTable ( )

private

Read all rows from the build input and store the rows into the in-memory hash table.

If the hash table goes full, the rest of the rows are written out to chunk files on disk. See the class comment for more details.

Return values

true	in case of error

◆ ChunkCount()

int HashJoinIterator::ChunkCount ( )

inline

◆ EndPSIBatchModeIfStarted()

void HashJoinIterator::EndPSIBatchModeIfStarted ( )

inlineoverridevirtual

Ends performance schema batch mode, if started.

It's always safe to call this.

Iterators that have children (composite iterators) must forward the EndPSIBatchModeIfStarted() call to every iterator they could conceivably have called StartPSIBatchMode() on. This ensures that after such a call to on the root iterator, all handlers are out of batch mode.

Reimplemented from RowIterator.

◆ Init()

bool HashJoinIterator::Init ( )

overridevirtual

Initialize or reinitialize the iterator.

You must always call Init() before trying a Read() (but Init() does not imply Read()).

You can call Init() multiple times; subsequent calls will rewind the iterator (or reposition it, depending on whether the iterator takes in e.g. a Index_lookup) and allow you to read the records anew.

Implements RowIterator.

◆ InitHashTable()

bool HashJoinIterator::InitHashTable ( )

private

Helper function for Init().

Build the hash table and check for empty query results (empty build input or non-empty build input in case of degenerate antijoin.)

Returns: 'true' in case of error.

◆ InitProbeIterator()

bool HashJoinIterator::InitProbeIterator ( )

private

Prepare to read the probe iterator from the beginning, and enable batch mode if applicable.

The iterator state will remain unchanged.

Return values

true	in case of error. my_error has been called.

◆ InitReadingFromProbeRowSavingFile()

bool HashJoinIterator::InitReadingFromProbeRowSavingFile ( )

private

Mark that we should read from the probe row saving file.

The probe row saving file is rewinded to the beginning.

See also: m_read_from_probe_row_saving

Return values

true	in case of error. my_error has been called.

◆ InitRowBuffer()

bool HashJoinIterator::InitRowBuffer ( )

private

Clear the row buffer and reset all iterators pointing to it.

This may be called multiple times to re-init the row buffer.

Return values

true	in case of error. my_error has been called

◆ InitWritingToProbeRowSavingFile()

bool HashJoinIterator::InitWritingToProbeRowSavingFile ( )

private

Mark that probe row saving is enabled, and prepare the probe row saving file for writing.

See also: m_write_to_probe_row_saving

Return values

true	in case of error. my_error has been called.

◆ JoinedRowPassesExtraConditions()

bool HashJoinIterator::JoinedRowPassesExtraConditions ( ) const

private

Return values

true	if the last joined row passes all of the extra conditions.

◆ LookupProbeRowInHashTable()

void HashJoinIterator::LookupProbeRowInHashTable ( )

private

◆ on_disk_hash_join()

bool HashJoinIterator::on_disk_hash_join ( ) const

inlineprivate

◆ Read()

int HashJoinIterator::Read ( )

overridevirtual

Read a single row.

The row data is not actually returned from the function; it is put in the table's (or tables', in case of a join) record buffer, ie., table->records[0].

Return values

0	OK
-1	End of records
1	Error

Implements RowIterator.

◆ ReadFirstProbeRow()

bool HashJoinIterator::ReadFirstProbeRow ( )

private

Helper function for Init().

Read the first row from m_probe_input.

Returns: 'true' if there was an error.

◆ ReadJoinedRow()

int HashJoinIterator::ReadJoinedRow ( )

private

Take the next matching row from the hash table, and put the row into the build tables' record buffers.

The function expects that LookupProbeRowInHashTable() has been called up-front. The user must call ReadJoinedRow() as long as it returns false, as there may be multiple matching rows from the hash table. It is up to the caller to set a new state in case of EOF.

Return values

0	if a match was found and the row is put in the build tables' record buffers
-1	if there are no more matching rows in the hash table

◆ ReadNextHashJoinChunk()

bool HashJoinIterator::ReadNextHashJoinChunk ( )

private

Read all rows from the next chunk file into the in-memory hash table.

See the class comment for details.

Return values

true	in case of error

◆ ReadNextJoinedRowFromHashTable()

int HashJoinIterator::ReadNextJoinedRowFromHashTable ( )

private

Read a joined row from the hash table, and see if it passes any extra conditions.

The last probe row read will also be written do disk if needed (see WriteProbeRowToDiskIfApplicable).

Return values

-1	There are no more matching rows in the hash table.
0	A joined row is ready.
1	An error occurred.

◆ ReadRowFromProbeChunkFile()

bool HashJoinIterator::ReadRowFromProbeChunkFile ( )

private

Read a single row from the current probe chunk file into the tables' record buffers.

The end conditions are the same as for ReadRowFromProbeIterator().

Return values

true	in case of error

◆ ReadRowFromProbeIterator()

bool HashJoinIterator::ReadRowFromProbeIterator ( )

private

Read a single row from the probe iterator input into the tables' record buffers.

If we have started spilling to disk, the row is written out to a chunk file on disk as well.

The end condition is that either: a) a row is ready in the tables' record buffers, and the state will be set to READING_FIRST_ROW_FROM_HASH_TABLE. b) There are no more rows to process from the probe input, so the iterator state will be LOADING_NEXT_CHUNK_PAIR.

Return values

true	in case of error

◆ ReadRowFromProbeRowSavingFile()

bool HashJoinIterator::ReadRowFromProbeRowSavingFile ( )

private

Read a single row from the probe row saving file into the tables' record buffers.

Return values

true	in case of error

◆ RejectDuplicateKeys()

bool HashJoinIterator::RejectDuplicateKeys ( ) const

inlineprivate

If true, reject duplicate keys in the hash table.

Semijoins/antijoins are only interested in the first matching row from the hash table, so we can avoid storing duplicate keys in order to save some memory. However, this cannot be applied if we have any "extra" conditions: the first matching row in the hash table may fail the extra condition(s).

Return values

true	if we can reject duplicate keys in the hash table.

◆ SetNullRowFlag()

void HashJoinIterator::SetNullRowFlag ( bool is_null_row )

inlineoverridevirtual

Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag is true, you'll get only NULLs no matter what is actually in the buffer (typically some old leftover row).

This is used for outer joins, when an iterator hasn't produced any rows and we need to produce a NULL-complemented row. Init() or Read() won't necessarily reset this flag, so if you ever set is to true, make sure to also set it to false when needed.

Note that this can be called without Init() having been called first. For example, NestedLoopIterator can hit EOF immediately on the outer iterator, which means the inner iterator doesn't get an Init() call, but will still forward SetNullRowFlag to both inner and outer iterators.

TODO: We shouldn't need this. See the comments on AggregateIterator for a bit more discussion on abstracting out a row interface.

Implements RowIterator.

◆ SetReadingProbeRowState()

void HashJoinIterator::SetReadingProbeRowState ( )

private

Set the iterator state to the correct READING_ROW_FROM_PROBE_*-state.

Which state we end up in depends on which hash join type we are executing (in-memory, on-disk or in-memory with hash table refill).

◆ UnlockRow()

void HashJoinIterator::UnlockRow ( )

inlineoverridevirtual

Implements RowIterator.

◆ WriteBuildTableToChunkFiles()

bool HashJoinIterator::WriteBuildTableToChunkFiles ( )

private

Write all the remaining rows from the build table input to chunk files on disk.

Returns: True on error, false on success.

◆ WriteProbeRowToDiskIfApplicable()

bool HashJoinIterator::WriteProbeRowToDiskIfApplicable ( )

private

Write the last row read from the probe input out to chunk files on disk, if applicable.

For inner joins, we must write all probe rows to chunk files, since we need to match the row against rows from the build input that are written out to chunk files. For semijoin, we can only write probe rows that do not match any of the rows in the hash table. Writing a probe row with a matching row in the hash table could cause the row to be returned multiple times.

Return values

true	in case of errors.

Member Data Documentation

◆ kChunkPartitioningHashSeed

constexpr uint32_t HashJoinIterator::kChunkPartitioningHashSeed {899339}

staticconstexpr

◆ kMaxChunks

constexpr size_t HashJoinIterator::kMaxChunks = 128

staticconstexpr

◆ m_allow_spill_to_disk

bool HashJoinIterator::m_allow_spill_to_disk {true}

private

◆ m_build_chunk_current_row

ha_rows HashJoinIterator::m_build_chunk_current_row = 0

private

◆ m_build_input

const unique_ptr_destroy_only<RowIterator> HashJoinIterator::m_build_input

private

◆ m_build_input_tables

pack_rows::TableCollection HashJoinIterator::m_build_input_tables

private

◆ m_build_iterator_has_more_rows

bool HashJoinIterator::m_build_iterator_has_more_rows {true}

private

◆ m_chunk_files_on_disk

Mem_root_array<ChunkPair> HashJoinIterator::m_chunk_files_on_disk

private

◆ m_current_chunk

int HashJoinIterator::m_current_chunk {-1}

private

◆ m_current_row

LinkedImmutableString HashJoinIterator::m_current_row {nullptr}

private

◆ m_estimated_build_rows

const double HashJoinIterator::m_estimated_build_rows

private

◆ m_extra_condition

Item* HashJoinIterator::m_extra_condition {nullptr}

private

◆ m_first_input

HashJoinInput HashJoinIterator::m_first_input

private

The first input (build or probe) to read from.

(If this is empty, we will not have to read from the other.)

◆ m_hash_join_type

HashJoinType HashJoinIterator::m_hash_join_type {HashJoinType::IN_MEMORY}

private

◆ m_hash_table_generation

uint64_t* HashJoinIterator::m_hash_table_generation

private

◆ m_join_conditions

Prealloced_array<HashJoinCondition, 4> HashJoinIterator::m_join_conditions

private

◆ m_join_type

const JoinType HashJoinIterator::m_join_type

private

◆ m_last_hash_table_generation

uint64_t HashJoinIterator::m_last_hash_table_generation

private

◆ m_probe_chunk_current_row

ha_rows HashJoinIterator::m_probe_chunk_current_row = 0

private

◆ m_probe_input

const unique_ptr_destroy_only<RowIterator> HashJoinIterator::m_probe_input

private

◆ m_probe_input_batch_mode

bool HashJoinIterator::m_probe_input_batch_mode {false}

private

◆ m_probe_input_tables

pack_rows::TableCollection HashJoinIterator::m_probe_input_tables

private

◆ m_probe_row_match_flag

bool HashJoinIterator::m_probe_row_match_flag {false}

private

◆ m_probe_row_read

bool HashJoinIterator::m_probe_row_read {false}

private

If true, a row was already read from the probe input, in order to check if that input was empty.

If so, we should process that row before reading another.

◆ m_probe_row_saving_read_file

HashJoinChunk HashJoinIterator::m_probe_row_saving_read_file

private

◆ m_probe_row_saving_read_file_current_row

ha_rows HashJoinIterator::m_probe_row_saving_read_file_current_row {0}

private

◆ m_probe_row_saving_write_file

HashJoinChunk HashJoinIterator::m_probe_row_saving_write_file

private

◆ m_read_from_probe_row_saving

bool HashJoinIterator::m_read_from_probe_row_saving {false}

private

◆ m_row_buffer

hash_join_buffer::HashJoinRowBuffer HashJoinIterator::m_row_buffer

private

◆ m_single_row_index_lookups

std::span<AccessPath *> HashJoinIterator::m_single_row_index_lookups

private

◆ m_state

State HashJoinIterator::m_state

private

◆ m_temporary_row_and_join_key_buffer

String HashJoinIterator::m_temporary_row_and_join_key_buffer

private

◆ m_write_to_probe_row_saving

bool HashJoinIterator::m_write_to_probe_row_saving {false}

private

The documentation for this class was generated from the following files:

sql/iterators/hash_join_iterator.h
sql/iterators/hash_join_iterator.cc

Public Member Functions

Static Public Attributes

Private Types

Private Member Functions

Private Attributes

Additional Inherited Members

Member Enumeration Documentation

◆ HashJoinType

◆ State

Constructor & Destructor Documentation

◆ HashJoinIterator()

Member Function Documentation

◆ BuildHashTable()

◆ ChunkCount()

◆ EndPSIBatchModeIfStarted()

◆ Init()

◆ InitHashTable()

◆ InitProbeIterator()

◆ InitReadingFromProbeRowSavingFile()

◆ InitRowBuffer()

◆ InitWritingToProbeRowSavingFile()

◆ JoinedRowPassesExtraConditions()

◆ LookupProbeRowInHashTable()

◆ on_disk_hash_join()

◆ Read()

◆ ReadFirstProbeRow()

◆ ReadJoinedRow()

◆ ReadNextHashJoinChunk()

◆ ReadNextJoinedRowFromHashTable()

◆ ReadRowFromProbeChunkFile()

◆ ReadRowFromProbeIterator()

◆ ReadRowFromProbeRowSavingFile()

◆ RejectDuplicateKeys()

◆ SetNullRowFlag()

◆ SetReadingProbeRowState()

◆ UnlockRow()

◆ WriteBuildTableToChunkFiles()

◆ WriteProbeRowToDiskIfApplicable()

Member Data Documentation

◆ kChunkPartitioningHashSeed

◆ kMaxChunks

◆ m_allow_spill_to_disk

◆ m_build_chunk_current_row

◆ m_build_input

◆ m_build_input_tables

◆ m_build_iterator_has_more_rows

◆ m_chunk_files_on_disk

◆ m_current_chunk

◆ m_current_row

◆ m_estimated_build_rows

◆ m_extra_condition

◆ m_first_input

◆ m_hash_join_type

◆ m_hash_table_generation

◆ m_join_conditions

◆ m_join_type

◆ m_last_hash_table_generation

◆ m_probe_chunk_current_row

◆ m_probe_input

◆ m_probe_input_batch_mode

◆ m_probe_input_tables

◆ m_probe_row_match_flag

◆ m_probe_row_read

◆ m_probe_row_saving_read_file

◆ m_probe_row_saving_read_file_current_row

◆ m_probe_row_saving_write_file

◆ m_read_from_probe_row_saving

◆ m_row_buffer

◆ m_single_row_index_lookups

◆ m_state

◆ m_temporary_row_and_join_key_buffer

◆ m_write_to_probe_row_saving