MySQL 9.0.0
Source Code Documentation
|
Temporary tables implementation. More...
#include "sql/sql_tmp_table.h"
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <algorithm>
#include <cstring>
#include <memory>
#include <new>
#include <utility>
#include <vector>
#include "field_types.h"
#include "lex_string.h"
#include "m_string.h"
#include "my_alloc.h"
#include "my_bitmap.h"
#include "my_compiler.h"
#include "my_dbug.h"
#include "my_pointer_arithmetic.h"
#include "my_sys.h"
#include "mysql/plugin.h"
#include "mysql/strings/m_ctype.h"
#include "mysql/udf_registration_types.h"
#include "mysql_com.h"
#include "mysqld_error.h"
#include "nulls.h"
#include "scope_guard.h"
#include "sql/create_field.h"
#include "sql/current_thd.h"
#include "sql/dd/types/column.h"
#include "sql/debug_sync.h"
#include "sql/field.h"
#include "sql/filesort.h"
#include "sql/handler.h"
#include "sql/item_func.h"
#include "sql/item_sum.h"
#include "sql/key.h"
#include "sql/mem_root_allocator.h"
#include "sql/mem_root_array.h"
#include "sql/mysqld.h"
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/psi_memory_key.h"
#include "sql/query_options.h"
#include "sql/range_optimizer/range_optimizer.h"
#include "sql/sql_base.h"
#include "sql/sql_class.h"
#include "sql/sql_const.h"
#include "sql/sql_executor.h"
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_opt_exec_shared.h"
#include "sql/sql_optimizer.h"
#include "sql/sql_plugin.h"
#include "sql/sql_plugin_ref.h"
#include "sql/sql_select.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/temp_table_param.h"
#include "sql/thd_raii.h"
#include "sql/thr_malloc.h"
#include "sql/window.h"
#include "template_utils.h"
Classes | |
class | Cache_temp_engine_properties |
Cache for the storage engine properties for the alternative temporary table storage engines. More... | |
Macros | |
#define | STRING_TOTAL_LENGTH_TO_PACK_ROWS 128 |
Create a temp table according to a field list. More... | |
#define | AVG_STRING_LENGTH_TO_PACK_ROWS 64 |
#define | RATIO_TO_PACK_ROWS 2 |
Functions | |
static bool | alloc_record_buffers (THD *thd, TABLE *table) |
Helper function for create_tmp_table_* family for allocating record buffers. More... | |
Field * | create_tmp_field_from_field (THD *thd, const Field *org_field, const char *name, TABLE *table, Item_field *item) |
Lifecycle management of internal temporary tables. More... | |
static Field * | create_tmp_field_from_item (Item *item, TABLE *table) |
Create field for temporary table using type of given item. More... | |
static Field * | create_tmp_field_for_schema (const Item *item, TABLE *table) |
Create field for information schema table. More... | |
Field * | create_tmp_field (THD *thd, TABLE *table, Item *item, Item::Type type, Func_ptr_array *copy_func, Field **from_field, Field **default_field, bool group, bool modify_item, bool table_cant_handle_bit_fields, bool make_copy_field) |
Create field for temporary table. More... | |
static void | setup_tmp_table_column_bitmaps (TABLE *table, uchar *bitmaps) |
void | init_cache_tmp_engine_properties () |
Initialize the storage engine properties for the alternative temporary table storage engines. More... | |
void | get_max_key_and_part_length (uint *max_key_length, uint *max_key_part_length, uint *max_key_parts) |
Get the minimum of max_key_length/part_length/parts. More... | |
static const char * | create_tmp_table_field_tmp_name (THD *thd, Item *item) |
Create a temporary name for one field if the field_name is empty. More... | |
static void | register_hidden_field (TABLE *table, Field **default_field, Field **from_field, uint *blob_field, Field *field) |
Helper function for create_tmp_table(). More... | |
static void | set_real_row_type (TABLE *table) |
Helper function which evaluates correct TABLE_SHARE::real_row_type for the temporary table. More... | |
static void | sort_copy_func (const Query_block *query_block, Func_ptr_array *copy_func) |
Moves to the end of the 'copy_func' array the elements which contain a reference to an expression of the SELECT list of 'query_block'. More... | |
void | relocate_field (Field *field, uchar *pos, uchar *null_flags, uint *null_count) |
Helper function for create_tmp_table_* family for setting tmp table fields to their place in record buffer. More... | |
TABLE * | create_tmp_table (THD *thd, Temp_table_param *param, const mem_root_deque< Item * > &fields, ORDER *group, bool distinct, bool save_sum_fields, ulonglong select_options, ha_rows rows_limit, const char *table_alias) |
TABLE * | create_duplicate_weedout_tmp_table (THD *thd, uint uniq_tuple_length_arg, SJ_TMP_TABLE *sjtbl) |
Create a temporary table to weed out duplicate rowid combinations. More... | |
TABLE * | create_tmp_table_from_fields (THD *thd, List< Create_field > &field_list, bool is_virtual, ulonglong select_options, const char *alias) |
Create an, optionally reduced, TABLE object with properly set up Field list from a list of field definitions. More... | |
static bool | use_tmp_disk_storage_engine (THD *thd, TABLE *table, ulonglong select_options, bool force_disk_table, enum_internal_tmp_mem_storage_engine mem_engine) |
Checks if disk storage engine should be used for temporary table. More... | |
bool | setup_tmp_table_handler (THD *thd, TABLE *table, ulonglong select_options, bool force_disk_table, bool schema_table) |
Helper function to create_tmp_table_* family for setting up table's SE. More... | |
bool | open_tmp_table (TABLE *table) |
static bool | create_tmp_table_with_fallback (THD *thd, TABLE *table) |
Try to create an in-memory temporary table and if not enough space, then try to create an on-disk one. More... | |
static void | trace_tmp_table (Opt_trace_context *trace, const TABLE *table) |
bool | instantiate_tmp_table (THD *thd, TABLE *table) |
Instantiates temporary table. More... | |
void | close_tmp_table (TABLE *table) |
Close a temporary table at end of preparation or execution. More... | |
void | free_tmp_table (TABLE *table) |
Free temporary table. More... | |
bool | create_ondisk_from_heap (THD *thd, TABLE *wtable, int error, bool insert_last_record, bool ignore_last_dup, bool *is_duplicate) |
If a MEMORY table gets full, create a disk-based table and copy all rows to this. More... | |
void | encode_innodb_position (uchar *rowid_bytes, uint length, ha_rows row_num) |
Encode an InnoDB PK in 6 bytes, high-byte first; like InnoDB's dict_sys_write_row_id() does. More... | |
bool | reposition_innodb_cursor (TABLE *table, ha_rows row_num) |
Helper function for create_ondisk_from_heap(). More... | |
static int | FindCopyBitmap (Item *item) |
Temporary tables implementation.
#define AVG_STRING_LENGTH_TO_PACK_ROWS 64 |
#define RATIO_TO_PACK_ROWS 2 |
#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128 |
Create a temp table according to a field list.
Given field pointers are changed to point at tmp_table for send_result_set_metadata. The table object is self contained: it's allocated in its own memory root, as well as Field objects created for table columns. Those Field objects are common to TABLE and TABLE_SHARE. This function will replace Item_sum items in 'fields' list with corresponding Item_field items, pointing at the fields in the temporary table, unless save_sum_fields is set to false. The Item_field objects are created in THD memory root.
thd | thread handle |
param | a description used as input to create the table |
fields | list of items that will be used to define column types of the table (also see NOTES) |
group | Group key to use for temporary table, NULL if none |
distinct | should table rows be distinct |
save_sum_fields | see NOTES |
select_options | |
rows_limit | |
table_alias | possible name of the temporary table that can be used for name resolving; can be "". |
Helper function for create_tmp_table_* family for allocating record buffers.
thd | thread handler |
table | table to allocate record buffers for |
void close_tmp_table | ( | TABLE * | table | ) |
Close a temporary table at end of preparation or execution.
Any buffers associated with the table will be released. When tmp_open_count reaches zero, the following will happen:
table | Table reference |
TABLE * create_duplicate_weedout_tmp_table | ( | THD * | thd, |
uint | uniq_tuple_length_arg, | ||
SJ_TMP_TABLE * | sjtbl | ||
) |
Create a temporary table to weed out duplicate rowid combinations.
thd | Thread handle |
uniq_tuple_length_arg | Length of the table's column |
sjtbl | Update sjtbl->[start_]recinfo values which will be needed if we'll need to convert the created temptable from HEAP to MyISAM/Maria. |
create_duplicate_weedout_tmp_table()
Create a temporary table to weed out duplicate rowid combinations. The table has a single column that is a concatenation of all rowids in the combination.
Depending on the needed length, there are two cases:
When the length of the column < max_key_length:
CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
Otherwise (not a valid SQL syntax but internally supported):
CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
The code in this function was produced by extraction of relevant parts from create_tmp_table().
bool create_ondisk_from_heap | ( | THD * | thd, |
TABLE * | wtable, | ||
int | error, | ||
bool | insert_last_record, | ||
bool | ignore_last_dup, | ||
bool * | is_duplicate | ||
) |
If a MEMORY table gets full, create a disk-based table and copy all rows to this.
[in] | thd | THD reference |
[in] | wtable | Table reference being written to |
[in] | error | Reason why inserting into MEMORY table failed. |
[in] | insert_last_record | If true, the last record(table->record[0]) is inserted into the newly created table after copying all the records from the temp table. If false, the last record is not inserted and the parameters ignore_last_dup, is_duplicate are ignored. |
[in] | ignore_last_dup | If true, ignore duplicate key error for last inserted key (see detailed description below). |
[out] | is_duplicate | If non-NULL and ignore_last_dup is true, return true if last key was a duplicate, and false otherwise. |
Function can be called with any error code, but only HA_ERR_RECORD_FILE_FULL will be handled, all other errors cause a fatal error to be thrown. The function creates a disk-based temporary table, copies all records from the MEMORY table into this new table, deletes the old table and switches to use the new table within the table handle. The function uses table->record[1] as a temporary buffer while copying.
If the parameter insert_last_record is true, this function assumes that table->record[0] contains the row that caused the error when inserting into the MEMORY table (the "last row"). After all existing rows have been copied to the new table,the last row is attempted to be inserted as well. If ignore_last_dup is true, this row can be a duplicate of an existing row without throwing an error. If is_duplicate is non-NULL, an indication of whether the last row was a duplicate is returned.
If the parameter insert_last_record is false, this function makes no assumptions on the operation and will not try an insert of the last record(table->record[0]). The caller is expected to handle the operation after moving to disk.
If 'wtable' has other TABLE clones (example: a multi-referenced or a recursive CTE), we convert all clones; if an error happens during conversion of clone B after successfully converting clone A, clone A and B will exit from the function with a TABLE_SHARE corresponding to the pre-conversion table ("old" TABLE_SHARE). So A will be inconsistent (for example s->db_type() will say "MEMORY" while A->file will be a disk-based engine). However, as all callers bail out, it is reasonable to think that they won't be using the TABLE_SHARE except in free_tmp_table(); and free_tmp_table() only uses properties of TABLE_SHARE which are common to the old and new object (reference counts, MEM_ROOT), so that should work. Solutions to fix this cleanly:
Field * create_tmp_field | ( | THD * | thd, |
TABLE * | table, | ||
Item * | item, | ||
Item::Type | type, | ||
Func_ptr_array * | copy_func, | ||
Field ** | from_field, | ||
Field ** | default_field, | ||
bool | group, | ||
bool | modify_item, | ||
bool | table_cant_handle_bit_fields, | ||
bool | make_copy_field | ||
) |
Create field for temporary table.
thd | Thread handler |
table | Temporary table |
item | Item to create a field for |
type | Type of item (normally item->type) |
copy_func | If set and item is a function, store copy of item in this array |
from_field | if field will be created using other field as example, pointer example field will be written here |
default_field | If field has a default value field, store it here |
group | 1 if we are going to do a relative group by on result |
modify_item | 1 if item->result_field should point to new item. This is relevant for how fill_record() is going to work: If modify_item is 1 then fill_record() will update the record in the original table. If modify_item is 0 then fill_record() will update the temporary table |
table_cant_handle_bit_fields | if table can't handle bit-fields and bit-fields shall be converted to long |
make_copy_field | if true, a pointer of the result field should be stored in from_field, otherwise the item should be wrapped in Func_ptr and stored in copy_func |
NULL | On error. |
new_created | field |
Create field for information schema table.
table | Temporary table |
item | Item to create a field for |
0 | on error |
new_created | field |
Field * create_tmp_field_from_field | ( | THD * | thd, |
const Field * | org_field, | ||
const char * | name, | ||
TABLE * | table, | ||
Item_field * | item | ||
) |
Lifecycle management of internal temporary tables.
An internal temporary table is represented by a TABLE_SHARE object.
The interface to an internal temporary table is through one or more TABLE objects, of which at most one TABLE object is a writer object, the remaining TABLE objects are reader objects. Each TABLE object points to the TABLE_SHARE. TABLE_SHARE::ref_count counts the number of TABLE objects that points to it.
The TABLE, TABLE_SHARE and associated objects (e.g Field objects) are created in a dedicated mem_root. This mem_root is deleted when the TABLE_SHARE object is deleted.
Initially, an internal temporary table is created with one TABLE_SHARE object and one TABLE object. The table is created with no file handler (storage engine) and in the "deleted" state. Later, more TABLE objects may be created against the table, and TABLE_SHARE::ref_count is increased.
An internal temporary table may be instantiated and used multiple times, typically once per execution of a statement.
To instantiate a table, call instantiate_tmp_table(). This function will first assign and lock a storage engine using setup_tmp_table_handler(). The locked engine is assigned to TABLE_SHARE::db_plugin and the file handler is assigned to TABLE::file. After this, calling TABLE::has_storage_handler() reports true.
After this, the table contents is created by calling TABLE::file->create() and the table is opened by calling open_tmp_table(), which itself calls TABLE::file->ha_open(), and sets the TABLE::created flag.
Thus, opening a temporary table is a two-stage operation:
Since a temporary table may be in any of the two stages, we use two counter members in the TABLE_SHARE to count the number of TABLEs in each of the stages: tmp_handler_count and tmp_open_count. tmp_handler_count is incremented in setup_tmp_table_handler(). tmp_open_count is incremented in open_tmp_table().
To open an already instantiated table, assign a storage handler by calling setup_tmp_table_handler(), then call open_tmp_table() which will again increment TABLE_SHARE::tmp_open_count and set TABLE::created.
Insert, update, delete and read rows using the active TABLE handlers.
After use, close all active TABLE handlers by calling close_tmp_table(). For simplicity, we may also call close_tmp_table() on a non-active TABLE, as it will check whether a storage handler has been assigned.
If the table is created, TABLE_SHARE::tmp_open_count is decremented. If there are no remaining active TABLE objects, delete the table contents by calling TABLE::file->ha_drop_table(), otherwise close it by calling TABLE::file->ha_close(). Set status of the TABLE to deleted and delete the storage handler. If there are no remaining active tables and the storage engine is still locked, unlock the plugin and disassociate it from the TABLE_SHARE object, and decrement TABLE_SHARE::tmp_handler_count.
After the final instantiation of an internal temporary table, call free_tmp_table() for all associated TABLE objects.
free_tmp_table() can only be called on a non-instantiated temporary table (but handlers may be assigned for other TABLE objects to the same table).. It will decrement TABLE_SHARE::ref_count and the final call will also remove the temporary table's mem_root object. Create field for temporary table from given field.
thd | Thread handler |
org_field | Field from which new field will be created |
name | New field name |
table | Temporary table |
item | If item != NULL then fill_record() will update the record in the original table. If item == NULL then fill_record() will update the temporary table |
NULL | on error |
new_created | field |
Create field for temporary table using type of given item.
item | Item to create a field for |
table | Temporary table |
0 | on error |
new_created | field |
TABLE * create_tmp_table | ( | THD * | thd, |
Temp_table_param * | param, | ||
const mem_root_deque< Item * > & | fields, | ||
ORDER * | group, | ||
bool | distinct, | ||
bool | save_sum_fields, | ||
ulonglong | select_options, | ||
ha_rows | rows_limit, | ||
const char * | table_alias | ||
) |
When true, enforces unique constraint (by adding a hidden hash field and creating a key over this field) when: (1) unique key is too long, or (2) number of key parts in distinct key is too big, or (3) the caller has requested it. (4) we have INTERSECT or EXCEPT, i.e. not UNION.
Create a temporary name for one field if the field_name is empty.
thd | Thread handle |
item | Item to name the field after |
TABLE * create_tmp_table_from_fields | ( | THD * | thd, |
List< Create_field > & | field_list, | ||
bool | is_virtual, | ||
ulonglong | select_options, | ||
const char * | alias | ||
) |
Create an, optionally reduced, TABLE object with properly set up Field list from a list of field definitions.
When is_virtual arg is true: The created table doesn't have a table handler associated with it, has no keys, no group/distinct, no copy_funcs array. The sole purpose of this TABLE object is to use the power of Field class to read/write data to/from table->record[0]. Then one can store the record in any container (RB tree, hash, etc). The table is created in THD mem_root, so are the table's fields. Consequently, if you don't BLOB fields, you don't need to free it. When is_virtual is false: This function creates a normal tmp table out of fields' definitions, rather than from lst of items. This is the main difference with create_tmp_table. Also the table created here doesn't do grouping, doesn't have indexes and copy_funcs/fields. The purpose is to be able to create result table for table functions out of fields' definitions without need in intermediate list of items.
thd | connection handle |
field_list | list of column definitions |
is_virtual | if true, then it's effectively only a record buffer with wrapper, used e.g to store vars in SP if false, then a normal table, which can hold records, is created |
select_options | options for non-virtual tmp table |
alias | table's alias |
Try to create an in-memory temporary table and if not enough space, then try to create an on-disk one.
Create a temporary table according to passed description.
The passed array or MI_COLUMNDEF structures must have this form:
This function may use the free element to create hash column for unique constraint.
thd | Thread handler | |
[in,out] | table | Table object that describes the table to be created |
false | OK |
true | Error |
Encode an InnoDB PK in 6 bytes, high-byte first; like InnoDB's dict_sys_write_row_id() does.
rowid_bytes | where to store the result |
length | how many available bytes in rowid_bytes |
row_num | PK to encode |
|
static |
void free_tmp_table | ( | TABLE * | table | ) |
Free temporary table.
When ref_count reaches zero, the table's mem_root allocator is deleted.
table | Table reference |
void get_max_key_and_part_length | ( | uint * | max_key_length, |
uint * | max_key_part_length, | ||
uint * | max_key_parts | ||
) |
Get the minimum of max_key_length/part_length/parts.
Get the minimum of max_key_length and max_key_part_length between HEAP engine and internal_tmp_disk_storage_engine.
The minimum is between HEAP engine and internal_tmp_disk_storage_engine.
[out] | max_key_length | Minimum of max_key_length |
[out] | max_key_part_length | Minimum of max_key_part_length |
[out] | max_key_parts | Minimum of max_key_parts |
void init_cache_tmp_engine_properties | ( | ) |
Initialize the storage engine properties for the alternative temporary table storage engines.
Instantiates temporary table.
thd | Thread handler |
table | Table object that describes the table to be instantiated Creates temporary table and opens it. |
bool open_tmp_table | ( | TABLE * | table | ) |
|
static |
Helper function for create_tmp_table().
Insert a field at the head of the hidden field area.
table | Temporary table |
default_field | Default value array pointer |
from_field | Original field array pointer |
blob_field | Array pointer to record fields index of blob type |
field | The registered hidden field |
Helper function for create_tmp_table_* family for setting tmp table fields to their place in record buffer.
field | field to set |
pos | field's position in table's record buffer |
null_flags | beginning of table's null bits buffer |
null_count | field's null bit in null bits buffer |
Helper function for create_ondisk_from_heap().
Our InnoDB on-disk intrinsic table uses an autogenerated auto-incrementing primary key:
table | table read by cursor |
row_num | function should position on the row_num'th row in insertion order. |
|
static |
Helper function which evaluates correct TABLE_SHARE::real_row_type for the temporary table.
bool setup_tmp_table_handler | ( | THD * | thd, |
TABLE * | table, | ||
ulonglong | select_options, | ||
bool | force_disk_table, | ||
bool | schema_table | ||
) |
Helper function to create_tmp_table_* family for setting up table's SE.
thd | Thread handler |
table | table to allocate SE for |
select_options | Options that may control storage engine selection |
force_disk_table | true <=> Use InnoDB |
schema_table | whether the table is a schema table |
|
static |
Moves to the end of the 'copy_func' array the elements which contain a reference to an expression of the SELECT list of 'query_block'.
query_block | query block to search in | |
[in,out] | copy_func | array to sort |
|
static |
|
static |
Checks if disk storage engine should be used for temporary table.
thd | thread handler |
table | table to allocate SE for |
select_options | current select's options |
force_disk_table | true <=> Use InnoDB |
mem_engine | Selected in-memory storage engine. |