MySQL  8.0.19
Source Code Documentation
filesort.cc File Reference
#include "sql/filesort.h"
#include <limits.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <memory>
#include <new>
#include <vector>
#include "add_with_saturate.h"
#include "binlog_config.h"
#include "decimal.h"
#include "field_types.h"
#include "m_ctype.h"
#include "map_helpers.h"
#include "my_basename.h"
#include "my_bitmap.h"
#include "my_byteorder.h"
#include "my_compiler.h"
#include "my_dbug.h"
#include "my_inttypes.h"
#include "my_loglevel.h"
#include "my_sys.h"
#include "mysql/components/services/log_builtins.h"
#include "mysql/components/services/log_shared.h"
#include "mysql/psi/mysql_file.h"
#include "mysql/service_mysql_alloc.h"
#include "mysql/udf_registration_types.h"
#include "mysql_com.h"
#include "mysqld_error.h"
#include "nullable.h"
#include "priority_queue.h"
#include "sql/auth/sql_security_ctx.h"
#include "sql/bounded_queue.h"
#include "sql/cmp_varlen_keys.h"
#include "sql/debug_sync.h"
#include "sql/derror.h"
#include "sql/error_handler.h"
#include "sql/field.h"
#include "sql/filesort_utils.h"
#include "sql/handler.h"
#include "sql/item.h"
#include "sql/item_subselect.h"
#include "sql/json_dom.h"
#include "sql/key_spec.h"
#include "sql/malloc_allocator.h"
#include "sql/merge_many_buff.h"
#include "sql/my_decimal.h"
#include "sql/mysqld.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/pfs_batch_mode.h"
#include "sql/psi_memory_key.h"
#include "sql/row_iterator.h"
#include "sql/sort_param.h"
#include "sql/sorting_iterator.h"
#include "sql/sql_array.h"
#include "sql/sql_base.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_const.h"
#include "sql/sql_error.h"
#include "sql/sql_executor.h"
#include "sql/sql_lex.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_sort.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/thr_malloc.h"
#include "sql_string.h"
#include "template_utils.h"

Classes

class  Filesort_error_handler
 Error handler for filesort. More...
 

Functions

static ha_rows read_all_rows (THD *thd, Sort_param *param, QEP_TAB *qep_tab, Filesort_info *fs_info, IO_CACHE *chunk_file, IO_CACHE *tempfile, Bounded_queue< uchar *, uchar *, Sort_param, Mem_compare_queue_key > *pq, RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key)
 Read all rows, and write them into a temporary file (if we run out of space in the sort buffer). More...
 
static int write_keys (Sort_param *param, Filesort_info *fs_info, uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile)
 
static void register_used_fields (Sort_param *param)
 
static int merge_index (THD *thd, Sort_param *param, Sort_buffer sort_buffer, Merge_chunk_array chunk_array, IO_CACHE *tempfile, IO_CACHE *outfile)
 
static bool save_index (Sort_param *param, uint count, Filesort_info *table_sort, Sort_result *sort_result)
 This function is used only if the entire result set fits in memory. More...
 
static bool check_if_pq_applicable (Opt_trace_context *trace, Sort_param *param, Filesort_info *filesort_info, ha_rows num_rows, ulong memory_available)
 Test whether priority queue is worth using to get top elements of an ordered result set. More...
 
static void trace_filesort_information (Opt_trace_context *trace, const st_sort_field *sortorder, uint s_length)
 
bool filesort (THD *thd, Filesort *filesort, RowIterator *source_iterator, Filesort_info *fs_info, Sort_result *sort_result, ha_rows *found_rows)
 Sort a table. More...
 
void filesort_free_buffers (TABLE *table, bool full)
 
static void dbug_print_record (TABLE *table, bool print_rowid)
 
static bool alloc_and_make_sortkey (Sort_param *param, Filesort_info *fs_info, uchar *ref_pos, size_t *key_length)
 
static void copy_native_longlong (uchar *to, size_t to_length, longlong val, bool is_unsigned)
 
static NO_INLINE uint make_json_sort_key (Item *item, uchar *to, uchar *null_indicator, size_t length, ulonglong *hash)
 Make a sort key for the JSON value in an Item. More...
 
static uint read_to_buffer (IO_CACHE *fromfile, Merge_chunk *merge_chunk, Sort_param *param)
 Read from a disk file into the merge chunk's buffer. More...
 
static int merge_buffers (THD *thd, Sort_param *param, IO_CACHE *from_file, IO_CACHE *to_file, Sort_buffer sort_buffer, Merge_chunk *last_chunk, Merge_chunk_array chunk_array, bool include_keys)
 Merge buffers to one buffer. More...
 
uint sortlength (THD *thd, st_sort_field *sortorder, uint s_length)
 Calculate length of sort key. More...
 
void change_double_for_sort (double nr, uchar *to)
 

Variables

const bool Is_big_endian = false
 

Detailed Description

Standard external sort. We read rows into a buffer until there's no more room. At that point, we use it (using the sorting algorithms from STL), and write it to disk (thus the name “filesort”). When there are no more rows, we merge chunks recursively, seven and seven (although we can go all the way up to 15 in the final pass if it helps us do one pass less).

If all the rows fit in a single chunk, the data never hits disk, but remains in RAM.

Function Documentation

◆ alloc_and_make_sortkey()

static bool alloc_and_make_sortkey ( Sort_param param,
Filesort_info fs_info,
uchar ref_pos,
size_t *  key_length 
)
static

◆ change_double_for_sort()

void change_double_for_sort ( double  nr,
uchar to 
)

◆ check_if_pq_applicable()

bool check_if_pq_applicable ( Opt_trace_context trace,
Sort_param param,
Filesort_info filesort_info,
ha_rows  num_rows,
ulong  memory_available 
)
static

Test whether priority queue is worth using to get top elements of an ordered result set.

If it is, then allocates buffer for required amount of records

Parameters
traceCurrent trace context.
paramSort parameters.
filesort_infoFilesort information.
num_rowsEstimate of number of rows in source record set.
memory_availableMemory available for sorting.

DESCRIPTION Given a query like this: SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows; This function tests whether a priority queue can be used to keep the result (ie., there is enough memory to store <max_rows> rows).

Returns
true - if it's ok to use PQ false - or there is not enough memory.

◆ copy_native_longlong()

static void copy_native_longlong ( uchar to,
size_t  to_length,
longlong  val,
bool  is_unsigned 
)
static

◆ dbug_print_record()

static void dbug_print_record ( TABLE table,
bool  print_rowid 
)
static

◆ filesort()

bool filesort ( THD thd,
Filesort filesort,
RowIterator source_iterator,
Filesort_info fs_info,
Sort_result sort_result,
ha_rows found_rows 
)

Sort a table.

Creates a set of pointers that can be used to read the rows in sorted order. This should be done with the functions in records.cc.

The result set is stored in fs_info->io_cache or fs_info->sorted_result, or left in the main filesort buffer.

Parameters
thdCurrent thread
filesortHow to sort the table
source_iteratorWhere to read the rows to be sorted from.
fs_infoOwns the buffers for sort_result.
sort_resultWhere to store the sort result.
[out]found_rowsStore the number of found rows here. This is the number of found rows after applying WHERE condition.
Note
If we sort by position (like if sort_positions is 1) filesort() will call table->prepare_for_position().
Returns
False if success, true if error

◆ filesort_free_buffers()

void filesort_free_buffers ( TABLE table,
bool  full 
)

◆ make_json_sort_key()

static NO_INLINE uint make_json_sort_key ( Item item,
uchar to,
uchar null_indicator,
size_t  length,
ulonglong hash 
)
static

Make a sort key for the JSON value in an Item.

This function is called by Sort_param::make_sortkey(). We don't want it to be inlined, since that seemed to have a negative impact on some performance tests.

Parameters
[in]itemThe item for which to create a sort key.
[out]toPointer into the buffer to which the sort key should be written. It will point to where the data portion of the key should start.
[out]null_indicatorFor nullable items, the NULL indicator byte. (Ignored otherwise.) Should be initialized by the caller to a value that indicates not NULL.
[in]lengthThe length of the sort key, not including the NULL indicator byte at the beginning of the sort key for nullable items.
[in,out]hashThe hash key of the JSON values in the current row.
Returns
length of the key stored

◆ merge_buffers()

static int merge_buffers ( THD thd,
Sort_param param,
IO_CACHE from_file,
IO_CACHE to_file,
Sort_buffer  sort_buffer,
Merge_chunk last_chunk,
Merge_chunk_array  chunk_array,
bool  include_keys 
)
static

Merge buffers to one buffer.

Parameters
thd
paramSort parameter
from_fileFile with source data (Merge_chunks point to this file)
to_fileFile to write the sorted result data.
sort_bufferBuffer for data to store up to MERGEBUFF2 sort keys.
[out]last_chunkStore here Merge_chunk describing data written to to_file.
chunk_arrayArray of chunks to merge.
include_keysIf true, write both the keys and the addons / row positions. If false, the keys will be skipped (useful only for the output of the final merge, where we don't need to compare rows further).
Returns
0 OK
other error

◆ merge_index()

static int merge_index ( THD thd,
Sort_param param,
Sort_buffer  sort_buffer,
Merge_chunk_array  chunk_array,
IO_CACHE tempfile,
IO_CACHE outfile 
)
static

◆ read_all_rows()

static ha_rows read_all_rows ( THD thd,
Sort_param param,
QEP_TAB qep_tab,
Filesort_info fs_info,
IO_CACHE chunk_file,
IO_CACHE tempfile,
Bounded_queue< uchar *, uchar *, Sort_param, Mem_compare_queue_key > *  pq,
RowIterator source_iterator,
ha_rows found_rows,
size_t *  longest_key 
)
static

Read all rows, and write them into a temporary file (if we run out of space in the sort buffer).

All produced sequences are guaranteed to be non-empty.

Parameters
thdThread handle
paramSorting parameter
qep_tabParameters for which data to read (see source_iterator).
fs_infoStruct containing sort buffer etc.
chunk_fileFile to write Merge_chunks describing sorted segments in tempfile.
tempfileFile to write sorted sequences of sortkeys to.
pqIf !NULL, use it for keeping top N elements
source_iteratorWhere to read the rows to be sorted from.
[out]found_rowsThe number of FOUND_ROWS(). For a query with LIMIT, this value will typically be larger than the function return value.
[out]longest_keyThe largest single key found, in bytes.
Note
Basic idea:
   while (get_next_sortkey())
   {
     if (using priority queue)
       push sort key into queue
     else
     {
       try to put sort key into buffer;
       if (no free space in sort buffer)
       {
         do {
           allocate new, larger buffer;
           retry putting sort key into buffer;
         } until (record fits or no space for new buffer)
         if (no space for new buffer)
         {
           sort record pointers (all buffers);
           dump sorted sequence to 'tempfile';
           dump Merge_chunk describing sequence location into 'chunk_file';
         }
       }
       if (key was packed)
         tell sort buffer the actual number of bytes used;
     }
   }
   if (buffer has some elements && dumped at least once)
     sort-dump-dump as above;
   else
     don't sort, leave sort buffer to be sorted by caller.
Returns
Number of records written on success.
HA_POS_ERROR on error.

◆ read_to_buffer()

static uint read_to_buffer ( IO_CACHE fromfile,
Merge_chunk merge_chunk,
Sort_param param 
)
static

Read from a disk file into the merge chunk's buffer.

We generally read as many complete rows as we can, except when bounded by max_keys() or rowcount(). Incomplete rows will be left in the file.

Returns
Number of bytes read, or (uint)-1 if something went wrong.

◆ register_used_fields()

static void register_used_fields ( Sort_param param)
static

◆ save_index()

static bool save_index ( Sort_param param,
uint  count,
Filesort_info table_sort,
Sort_result sort_result 
)
static

This function is used only if the entire result set fits in memory.

For addon fields, we keep the result in the filesort buffer. This saves us a lot of memcpy calls.

For row references, we copy the final sorted result into a buffer, but we do not copy the actual sort-keys, as they are no longer needed. We could have kept the result in the sort buffere here as well, but the new buffer - containing only row references - is probably a lot smaller.

The result data will be unpacked by SortBufferIterator or SortBufferIndirectIterator

Note that SortBufferIterator does not have access to a Sort_param. It does however have access to a Filesort_info, which knows whether we have variable sized keys or not. TODO: consider templatizing SortBufferIterator on is_varlen or not.

Parameters
[in]paramSort parameters.
countNumber of records
[in,out]table_sortInformation used by SortBufferIterator / SortBufferIndirectIterator
[out]sort_resultWhere to store the actual result

◆ sortlength()

uint sortlength ( THD thd,
st_sort_field sortorder,
uint  s_length 
)

Calculate length of sort key.

Declared here so we can unit test it.

Parameters
thdThread handler
sortorderOrder of items to sort
s_lengthNumber of items to sort
Note
sortorder->length is updated for each sort item.
Returns
Total length of sort buffer in bytes

◆ trace_filesort_information()

static void trace_filesort_information ( Opt_trace_context trace,
const st_sort_field sortorder,
uint  s_length 
)
static

◆ write_keys()

static int write_keys ( Sort_param param,
Filesort_info fs_info,
uint  count,
IO_CACHE chunk_file,
IO_CACHE tempfile 
)
static

Sort the buffer and write:

  1. the sorted sequence to tempfile
  2. a Merge_chunk describing the sorted sequence position to chunk_file
Parameters
paramSort parameters
fs_infoContains the buffer to be sorted and written.
countNumber of records to write.
chunk_fileOne 'Merge_chunk' struct will be written into this file. The Merge_chunk::{file_pos, count} will indicate where the sorted data was stored.
tempfileThe sorted sequence will be written into this file.
Returns
0 OK
1 Error

Variable Documentation

◆ Is_big_endian

const bool Is_big_endian = false