MySQL 8.3.0
Source Code Documentation File Reference

Standard external sort. More...

#include "sql/filesort.h"
#include <limits.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <memory>
#include <new>
#include <optional>
#include <vector>
#include "add_with_saturate.h"
#include "decimal.h"
#include "field_types.h"
#include "map_helpers.h"
#include "my_basename.h"
#include "my_bitmap.h"
#include "my_byteorder.h"
#include "my_compiler.h"
#include "my_config.h"
#include "my_dbug.h"
#include "my_inttypes.h"
#include "my_sys.h"
#include "mysql/components/services/log_builtins.h"
#include "mysql/components/services/log_shared.h"
#include "mysql/my_loglevel.h"
#include "mysql/psi/mysql_file.h"
#include "mysql/service_mysql_alloc.h"
#include "mysql/strings/m_ctype.h"
#include "mysql/udf_registration_types.h"
#include "mysql_com.h"
#include "mysqld_error.h"
#include "priority_queue.h"
#include "sql-common/json_dom.h"
#include "sql-common/my_decimal.h"
#include "sql/auth/sql_security_ctx.h"
#include "sql/bounded_queue.h"
#include "sql/cmp_varlen_keys.h"
#include "sql/debug_sync.h"
#include "sql/derror.h"
#include "sql/error_handler.h"
#include "sql/field.h"
#include "sql/filesort_utils.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_subselect.h"
#include "sql/iterators/row_iterator.h"
#include "sql/iterators/sorting_iterator.h"
#include "sql/key_spec.h"
#include "sql/malloc_allocator.h"
#include "sql/merge_many_buff.h"
#include "sql/mysqld.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/pfs_batch_mode.h"
#include "sql/psi_memory_key.h"
#include "sql/sort_param.h"
#include "sql/sql_array.h"
#include "sql/sql_base.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_const.h"
#include "sql/sql_error.h"
#include "sql/sql_lex.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_sort.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/thr_malloc.h"
#include "sql/tztime.h"
#include "sql_string.h"
#include "template_utils.h"


struct  anonymous_namespace{}::Mem_compare_queue_key
class  Filesort_error_handler
 Error handler for filesort. More...


namespace  anonymous_namespace{}


static ha_rows read_all_rows (THD *thd, Sort_param *param, const Mem_root_array< TABLE * > &tables, table_map tables_to_get_rowid_for, Filesort_info *fs_info, IO_CACHE *chunk_file, IO_CACHE *tempfile, Bounded_queue< uchar *, uchar *, Sort_param, Mem_compare_queue_key > *pq, RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key, size_t *longest_addons)
 Read all rows, and write them into a temporary file (if we run out of space in the sort buffer). More...
static int write_keys (Sort_param *param, Filesort_info *fs_info, uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile)
static int merge_index (THD *thd, Sort_param *param, Sort_buffer sort_buffer, Merge_chunk_array chunk_array, IO_CACHE *tempfile, IO_CACHE *outfile)
static bool save_index (Sort_param *param, uint count, Filesort_info *table_sort, Sort_result *sort_result)
 This function is used only if the entire result set fits in memory. More...
static bool check_if_pq_applicable (Opt_trace_context *trace, Sort_param *param, Filesort_info *filesort_info, ha_rows num_rows, ulong memory_available)
 Test whether priority queue is worth using to get top elements of an ordered result set. More...
static void trace_filesort_information (Opt_trace_context *trace, const st_sort_field *sortorder, uint s_length)
bool filesort (THD *thd, Filesort *filesort, RowIterator *source_iterator, table_map tables_to_get_rowid_for, ha_rows num_rows_estimate, Filesort_info *fs_info, Sort_result *sort_result, ha_rows *found_rows)
 Sort a table. More...
void filesort_free_buffers (TABLE *table, bool full)
static void dbug_print_record (TABLE *table, bool print_rowid)
static bool alloc_and_make_sortkey (Sort_param *param, Filesort_info *fs_info, const Mem_root_array< TABLE * > &tables, size_t *key_length, size_t *longest_addons)
static NO_INLINE uint make_json_sort_key (Item *item, uchar *to, uchar *null_indicator, size_t length, ulonglong *hash)
 Make a sort key for the JSON value in an Item. More...
bool anonymous_namespace{}::write_uint8_overflows (uint8_t val, uchar *to_end, uchar **to)
bool anonymous_namespace{}::clear_overflows (size_t num_bytes, uchar *to_end, uchar **to)
bool anonymous_namespace{}::advance_overflows (size_t num_bytes, uchar *to_end, uchar **to)
size_t anonymous_namespace{}::make_sortkey_from_item (Item *item, Item_result result_type, std::optional< size_t > dst_length, String *tmp_buffer, uchar *to, uchar *to_end, bool *maybe_null, ulonglong *hash)
static uint read_to_buffer (IO_CACHE *fromfile, Merge_chunk *merge_chunk, Sort_param *param)
 Read from a disk file into the merge chunk's buffer. More...
static int copy_bytes (IO_CACHE *to_file, IO_CACHE *from_file, size_t count, my_off_t offset)
 Copy “count” bytes from one file, starting at offset “offset”, to the current write position (usually the end) of the other. More...
static int copy_row (IO_CACHE *to_file, IO_CACHE *from_file, Merge_chunk *merge_chunk, uint offset, uint bytes_to_write)
 Copy a row from “from_file” to “to_file” (this is used during merging). More...
static int merge_buffers (THD *thd, Sort_param *param, IO_CACHE *from_file, IO_CACHE *to_file, Sort_buffer sort_buffer, Merge_chunk *last_chunk, Merge_chunk_array chunk_array, bool include_keys)
 Merge buffers to one buffer. More...
uint sortlength (THD *thd, st_sort_field *sortorder, uint s_length)
 Calculate length of sort key. More...
bool SortWillBeOnRowId (const TABLE *table)
void change_double_for_sort (double nr, uchar *to)

Detailed Description

Standard external sort.

We read rows into a buffer until there's no more room. At that point, we use it (using the sorting algorithms from STL), and write it to disk (thus the name “filesort”). When there are no more rows, we merge chunks recursively, seven and seven (although we can go all the way up to 15 in the final pass if it helps us do one pass less).

If all the rows fit in a single chunk, the data never hits disk, but remains in RAM.

Function Documentation

◆ alloc_and_make_sortkey()

static bool alloc_and_make_sortkey ( Sort_param param,
Filesort_info fs_info,
const Mem_root_array< TABLE * > &  tables,
size_t *  key_length,
size_t *  longest_addons 

◆ change_double_for_sort()

void change_double_for_sort ( double  nr,
uchar to 

◆ check_if_pq_applicable()

bool check_if_pq_applicable ( Opt_trace_context trace,
Sort_param param,
Filesort_info filesort_info,
ha_rows  num_rows,
ulong  memory_available 

Test whether priority queue is worth using to get top elements of an ordered result set.

If it is, then allocates buffer for required amount of records

traceCurrent trace context.
paramSort parameters.
filesort_infoFilesort information.
num_rowsEstimate of number of rows in source record set.
memory_availableMemory available for sorting.

DESCRIPTION Given a query like this: SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows; This function tests whether a priority queue can be used to keep the result (ie., there is enough memory to store <max_rows> rows).

true - if it's ok to use PQ false - or there is not enough memory.

◆ copy_bytes()

static int copy_bytes ( IO_CACHE to_file,
IO_CACHE from_file,
size_t  count,
my_off_t  offset 

Copy “count” bytes from one file, starting at offset “offset”, to the current write position (usually the end) of the other.

◆ copy_row()

static int copy_row ( IO_CACHE to_file,
IO_CACHE from_file,
Merge_chunk merge_chunk,
uint  offset,
uint  bytes_to_write 

Copy a row from “from_file” to “to_file” (this is used during merging).

Most commonly, we'll already have all (or most) of it in memory, as indicated by “merge_chunk”, which must be positioned on the row. But in very tight circumstances (see read_to_buffer(), some of it may still be on disk, and will need to be copied directly from file to file.

◆ dbug_print_record()

static void dbug_print_record ( TABLE table,
bool  print_rowid 

◆ filesort()

bool filesort ( THD thd,
Filesort filesort,
RowIterator source_iterator,
table_map  tables_to_get_rowid_for,
ha_rows  num_rows_estimate,
Filesort_info fs_info,
Sort_result sort_result,
ha_rows found_rows 

Sort a table.

Creates a set of pointers that can be used to read the rows in sorted order. This should be done with the functions in

The result set is stored in fs_info->io_cache or fs_info->sorted_result, or left in the main filesort buffer.

thdCurrent thread
filesortHow to sort the table
source_iteratorWhere to read the rows to be sorted from.
tables_to_get_rowid_forWhich tables we are responsible for getting row IDs for. Tables in this set that are not also in "tables" are ignored.
num_rows_estimateHow many rows source_iterator is expected to produce. Only used for whether we intend to use the priority queue optimization or not; if we estimate fewer rows than we can fit into RAM, we never use the priority queue.
fs_infoOwns the buffers for sort_result.
sort_resultWhere to store the sort result.
[out]found_rowsStore the number of found rows here. This is the number of found rows after applying WHERE condition.
If we sort row IDs (as opposed to addon fields), filesort() will call table->prepare_for_position().
False if success, true if error

◆ filesort_free_buffers()

void filesort_free_buffers ( TABLE table,
bool  full 

◆ make_json_sort_key()

static NO_INLINE uint make_json_sort_key ( Item item,
uchar to,
uchar null_indicator,
size_t  length,
ulonglong hash 

Make a sort key for the JSON value in an Item.

This function is called by Sort_param::make_sortkey(). We don't want it to be inlined, since that seemed to have a negative impact on some performance tests.

[in]itemThe item for which to create a sort key.
[out]toPointer into the buffer to which the sort key should be written. It will point to where the data portion of the key should start.
[out]null_indicatorFor nullable items, the NULL indicator byte. (Ignored otherwise.) Should be initialized by the caller to a value that indicates not NULL.
[in]lengthThe length of the sort key, not including the NULL indicator byte at the beginning of the sort key for nullable items.
[in,out]hashThe hash key of the JSON values in the current row.
length of the key stored

◆ merge_buffers()

static int merge_buffers ( THD thd,
Sort_param param,
IO_CACHE from_file,
IO_CACHE to_file,
Sort_buffer  sort_buffer,
Merge_chunk last_chunk,
Merge_chunk_array  chunk_array,
bool  include_keys 

Merge buffers to one buffer.

thdthread context
paramSort parameter
from_fileFile with source data (Merge_chunks point to this file)
to_fileFile to write the sorted result data.
sort_bufferBuffer for data to store up to MERGEBUFF2 sort keys.
[out]last_chunkStore here Merge_chunk describing data written to to_file.
chunk_arrayArray of chunks to merge.
include_keysIf true, write both the keys and the addons / row positions. If false, the keys will be skipped (useful only for the output of the final merge, where we don't need to compare rows further).
0 OK
other error

◆ merge_index()

static int merge_index ( THD thd,
Sort_param param,
Sort_buffer  sort_buffer,
Merge_chunk_array  chunk_array,
IO_CACHE tempfile,
IO_CACHE outfile 

◆ read_all_rows()

static ha_rows read_all_rows ( THD thd,
Sort_param param,
const Mem_root_array< TABLE * > &  tables,
table_map  tables_to_get_rowid_for,
Filesort_info fs_info,
IO_CACHE chunk_file,
IO_CACHE tempfile,
Bounded_queue< uchar *, uchar *, Sort_param, Mem_compare_queue_key > *  pq,
RowIterator source_iterator,
ha_rows found_rows,
size_t *  longest_key,
size_t *  longest_addons 

Read all rows, and write them into a temporary file (if we run out of space in the sort buffer).

All produced sequences are guaranteed to be non-empty.

thdThread handle
paramSorting parameter
tablesList of all tables being sorted.
tables_to_get_rowid_forWhich tables we are responsible for getting row IDs for. Tables in this set that are not also in "tables" are ignored.
fs_infoStruct containing sort buffer etc.
chunk_fileFile to write Merge_chunks describing sorted segments in tempfile.
tempfileFile to write sorted sequences of sortkeys to.
pqIf !NULL, use it for keeping top N elements
source_iteratorWhere to read the rows to be sorted from.
[out]found_rowsThe number of FOUND_ROWS(). For a query with LIMIT, this value will typically be larger than the function return value.
[out]longest_keyThe largest single key found, in bytes.
[out]longest_addonsThe longest addon field row (sum of all addon fields for any single given row) found.
Basic idea:
   while (get_next_sortkey())
     if (using priority queue)
       push sort key into queue
       try to put sort key into buffer;
       if (no free space in sort buffer)
         do {
           allocate new, larger buffer;
           retry putting sort key into buffer;
         } until (record fits or no space for new buffer)
         if (no space for new buffer)
           sort record pointers (all buffers);
           dump sorted sequence to 'tempfile';
           dump Merge_chunk describing sequence location into 'chunk_file';
       if (key was packed)
         tell sort buffer the actual number of bytes used;
   if (buffer has some elements && dumped at least once)
     sort-dump-dump as above;
     don't sort, leave sort buffer to be sorted by caller.
Number of records written on success.
HA_POS_ERROR on error.

◆ read_to_buffer()

static uint read_to_buffer ( IO_CACHE fromfile,
Merge_chunk merge_chunk,
Sort_param param 

Read from a disk file into the merge chunk's buffer.

We generally read as many complete rows as we can, except when bounded by max_keys() or rowcount(). Incomplete rows will be left in the file.

Number of bytes read, or (uint)-1 if something went wrong.

◆ save_index()

static bool save_index ( Sort_param param,
uint  count,
Filesort_info table_sort,
Sort_result sort_result 

This function is used only if the entire result set fits in memory.

For addon fields, we keep the result in the filesort buffer. This saves us a lot of memcpy calls.

For row references, we copy the final sorted result into a buffer, but we do not copy the actual sort-keys, as they are no longer needed. We could have kept the result in the sort buffere here as well, but the new buffer - containing only row references - is probably a lot smaller.

The result data will be unpacked by SortBufferIterator or SortBufferIndirectIterator

Note that SortBufferIterator does not have access to a Sort_param. It does however have access to a Filesort_info, which knows whether we have variable sized keys or not. TODO: consider templatizing SortBufferIterator on is_varlen or not.

[in]paramSort parameters.
countNumber of records
[in,out]table_sortInformation used by SortBufferIterator / SortBufferIndirectIterator
[out]sort_resultWhere to store the actual result

◆ sortlength()

uint sortlength ( THD thd,
st_sort_field sortorder,
uint  s_length 

Calculate length of sort key.

Declared here so we can unit test it.

thdThread handler
sortorderOrder of items to sort
s_lengthNumber of items to sort
sortorder->length is updated for each sort item.
Total length of sort buffer in bytes

◆ SortWillBeOnRowId()

bool SortWillBeOnRowId ( const TABLE table)

◆ trace_filesort_information()

static void trace_filesort_information ( Opt_trace_context trace,
const st_sort_field sortorder,
uint  s_length 

◆ write_keys()

static int write_keys ( Sort_param param,
Filesort_info fs_info,
uint  count,
IO_CACHE chunk_file,
IO_CACHE tempfile 

Sort the buffer and write:

  1. the sorted sequence to tempfile
  2. a Merge_chunk describing the sorted sequence position to chunk_file
paramSort parameters
fs_infoContains the buffer to be sorted and written.
countNumber of records to write.
chunk_fileOne 'Merge_chunk' struct will be written into this file. The Merge_chunk::{file_pos, count} will indicate where the sorted data was stored.
tempfileThe sorted sequence will be written into this file.
0 OK
1 Error