MySQL 9.0.1
Source Code Documentation
|
Implements functions in the handler interface that are shared between all storage engines. More...
#include "sql/handler.h"
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <atomic>
#include <bit>
#include <cmath>
#include <list>
#include <random>
#include <string>
#include <string_view>
#include <vector>
#include "keycache.h"
#include "m_string.h"
#include "my_bitmap.h"
#include "my_check_opt.h"
#include "my_dbug.h"
#include "my_macros.h"
#include "my_pointer_arithmetic.h"
#include "my_psi_config.h"
#include "my_sqlcommand.h"
#include "my_sys.h"
#include "myisam.h"
#include "mysql/binlog/event/binlog_event.h"
#include "mysql/components/services/bits/psi_bits.h"
#include "mysql/components/services/log_builtins.h"
#include "mysql/components/services/log_shared.h"
#include "mysql/my_loglevel.h"
#include "mysql/plugin.h"
#include "mysql/psi/mysql_file.h"
#include "mysql/psi/mysql_mutex.h"
#include "mysql/psi/mysql_table.h"
#include "mysql/psi/mysql_transaction.h"
#include "mysql/psi/psi_table.h"
#include "mysql/service_mysql_alloc.h"
#include "mysql/strings/m_ctype.h"
#include "mysql_com.h"
#include "mysql_version.h"
#include "mysqld_error.h"
#include "prealloced_array.h"
#include "sd_notify.h"
#include "sql/auth/auth_common.h"
#include "sql/binlog.h"
#include "sql/check_stack.h"
#include "sql/clone_handler.h"
#include "sql/current_thd.h"
#include "sql/dd/cache/dictionary_client.h"
#include "sql/dd/dd.h"
#include "sql/dd/dictionary.h"
#include "sql/dd/types/table.h"
#include "sql/dd_table_share.h"
#include "sql/debug_sync.h"
#include "sql/derror.h"
#include "sql/error_handler.h"
#include "sql/field.h"
#include "sql/item.h"
#include "sql/join_optimizer/cost_model.h"
#include "sql/lock.h"
#include "sql/log.h"
#include "sql/log_event.h"
#include "sql/mdl.h"
#include "sql/mysqld.h"
#include "sql/opt_costconstantcache.h"
#include "sql/opt_costmodel.h"
#include "sql/opt_hints.h"
#include "sql/protocol.h"
#include "sql/psi_memory_key.h"
#include "sql/query_options.h"
#include "sql/record_buffer.h"
#include "sql/rpl_filter.h"
#include "sql/rpl_gtid.h"
#include "sql/rpl_handler.h"
#include "sql/rpl_replica_commit_order_manager.h"
#include "sql/rpl_rli.h"
#include "sql/rpl_write_set_handler.h"
#include "sql/sdi_utils.h"
#include "sql/session_tracker.h"
#include "sql/sql_base.h"
#include "sql/sql_bitmap.h"
#include "sql/sql_class.h"
#include "sql/sql_error.h"
#include "sql/sql_lex.h"
#include "sql/sql_parse.h"
#include "sql/sql_plugin.h"
#include "sql/sql_select.h"
#include "sql/sql_table.h"
#include "sql/strfunc.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/tc_log.h"
#include "sql/thr_malloc.h"
#include "sql/transaction.h"
#include "sql/transaction_info.h"
#include "sql/xa.h"
#include "sql/xa/sql_cmd_xa.h"
#include "sql_string.h"
#include "sql_tmp_table.h"
#include "string_with_len.h"
#include "template_utils.h"
#include "uniques.h"
Classes | |
struct | anonymous_namespace{handler.cc}::Storage_engine_identifier |
struct | st_sys_tbl_chk_params |
Structure used by SE during check for system table. More... | |
class | Ha_delete_table_error_handler |
struct | st_discover_args |
struct | st_find_files_args |
Call this function in order to give the handler the possibility to ask engine if there are any new tables that should be written to disk or any dropped tables that need to be removed from disk. More... | |
struct | st_table_exists_in_engine_args |
Ask handler if the table exists in engine. More... | |
struct | hton_list_st |
struct | binlog_func_st |
struct | binlog_log_query_st |
class | Binlog_log_row_cleanup |
The purpose of an instance of this class is to : More... | |
struct | blob_len_ptr |
This structure is a helper structure for passing the length and pointer of blob space allocated by storage engine. More... | |
struct | HTON_NOTIFY_PARAMS |
Auxiliary structure for passing information to notify_*_helper() functions. More... | |
Namespaces | |
namespace | anonymous_namespace{handler.cc} |
Macros | |
#define | MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) |
Instrumentation helper for table io_waits. More... | |
#define | MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) |
Instrumentation helper for table io_waits. More... | |
#define | SETMSG(nr, msg) handler_errmsgs[(nr)-HA_ERR_FIRST] = (msg) |
#define | AUTO_INC_DEFAULT_NB_ROWS 1 |
Update the auto_increment field if necessary. More... | |
#define | AUTO_INC_DEFAULT_NB_MAX_BITS 16 |
#define | AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1) |
#define | MAX_HTON_LIST_ST 63 |
Functions | |
static Prealloced_array< st_plugin_int *, PREALLOC_NUM_HA > | se_plugin_array (PSI_NOT_INSTRUMENTED) |
While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. More... | |
st_plugin_int * | hton2plugin (uint slot) |
Maps from slot to plugin. May return NULL if plugin has been unloaded. More... | |
size_t | num_hton2plugins () |
Returns the size of the array holding pointers to plugins. More... | |
st_plugin_int * | insert_hton2plugin (uint slot, st_plugin_int *plugin) |
For unit testing. More... | |
st_plugin_int * | remove_hton2plugin (uint slot) |
const char * | ha_resolve_storage_engine_name (const handlerton *db_type) |
static bool | check_engine_system_table_handlerton (THD *, plugin_ref plugin, void *arg) |
Called for each SE to check if given db, tablename is a system table. More... | |
static int | ha_discover (THD *thd, const char *db, const char *name, uchar **frmblob, size_t *frmlen) |
Try to discover one table from handler(s). More... | |
static plugin_ref | ha_default_plugin (THD *thd) |
handlerton * | ha_default_handlerton (THD *thd) |
Return the default storage engine handlerton used for non-temp tables for thread. More... | |
static plugin_ref | ha_default_temp_plugin (THD *thd) |
handlerton * | ha_default_temp_handlerton (THD *thd) |
Return the default storage engine handlerton used for explicitly created temp tables for a thread. More... | |
plugin_ref | ha_resolve_by_name_raw (THD *thd, const LEX_CSTRING &name) |
Resolve handlerton plugin by name, without checking for "DEFAULT" or HTON_NOT_USER_SELECTABLE. More... | |
static const CHARSET_INFO & | hton_charset () |
plugin_ref | ha_resolve_by_name (THD *thd, const LEX_CSTRING *name, bool is_temp_table) |
Return the storage engine handlerton for the supplied name. More... | |
bool | ha_secondary_engine_supports_ddl (THD *thd, const LEX_CSTRING &secondary_engine) noexcept |
void | set_externally_disabled_storage_engine_names (const char *disabled_list) |
Read a comma-separated list of storage engine names. More... | |
static bool | is_storage_engine_name_externally_disabled (const char *name) |
bool | ha_is_externally_disabled (const handlerton &htnr) |
Returns true if the storage engine of the handlerton argument has been listed in the disabled_storage_engines system variable. More... | |
bool | ha_is_storage_engine_disabled (handlerton *se_handle) |
plugin_ref | ha_lock_engine (THD *thd, const handlerton *hton) |
handlerton * | ha_resolve_by_legacy_type (THD *thd, enum legacy_db_type db_type) |
handlerton * | ha_checktype (THD *thd, enum legacy_db_type database_type, bool no_substitute, bool report_error) |
Use other database handler if databasehandler is not compiled in. More... | |
handler * | get_new_handler (TABLE_SHARE *share, bool partitioned, MEM_ROOT *alloc, handlerton *db_type) |
Create handler object for the table in the storage engine. More... | |
static const char * | get_handler_errmsg (int nr) |
int | ha_init_errors (void) |
Register handler error messages for use with my_error(). More... | |
int | ha_finalize_handlerton (st_plugin_int *plugin) |
int | ha_initialize_handlerton (st_plugin_int *plugin) |
int | ha_init () |
void | ha_end () |
static bool | dropdb_handlerton (THD *, plugin_ref plugin, void *path) |
void | ha_drop_database (char *path) |
static bool | closecon_handlerton (THD *thd, plugin_ref plugin, void *) |
static bool | reset_plugin_vars_handlerton (THD *thd, plugin_ref plugin, void *) |
void | ha_reset_plugin_vars (THD *thd) |
void | ha_close_connection (THD *thd) |
static bool | kill_handlerton (THD *thd, plugin_ref plugin, void *) |
void | ha_kill_connection (THD *thd) |
static bool | pre_dd_shutdown_handlerton (THD *, plugin_ref plugin, void *) |
Invoke handlerton::pre_dd_shutdown() on a plugin. More... | |
void | ha_pre_dd_shutdown (void) |
Invoke handlerton::pre_dd_shutdown() on every storage engine plugin. More... | |
void | trans_register_ha (THD *thd, bool all, handlerton *ht_arg, const ulonglong *trxid) |
Register a storage engine for a transaction. More... | |
static uint | ha_check_and_coalesce_trx_read_only (THD *thd, Ha_trx_info_list &ha_list, bool all) |
Check if we can skip the two-phase commit. More... | |
bool | is_ha_commit_low_invoking_commit_order (THD *thd, bool all) |
Determines whether ha_commit_low may invoke commit ordering. More... | |
static bool | error_from_deferred_processing_se (const THD *thd) |
Check if error came from SE that defers processing to commit time. More... | |
std::pair< int, bool > | commit_owned_gtids (THD *thd, bool all) |
The function computes condition to call gtid persistor wrapper, and executes it. More... | |
int | ha_commit_trans (THD *thd, bool all, bool ignore_global_read_lock) |
int | ha_commit_low (THD *thd, bool all, bool run_after_commit) |
Commit the sessions outstanding transaction. More... | |
int | ha_rollback_low (THD *thd, bool all) |
int | ha_rollback_trans (THD *thd, bool all) |
int | ha_commit_attachable (THD *thd) |
Commit the attachable transaction in storage engines. More... | |
bool | ha_rollback_to_savepoint_can_release_mdl (THD *thd) |
Check if all storage engines used in transaction agree that after rollback to savepoint it is safe to release MDL locks acquired after savepoint creation. More... | |
int | ha_rollback_to_savepoint (THD *thd, SAVEPOINT *sv) |
int | ha_prepare_low (THD *thd, bool all) |
Prepares the underlying transaction of the THD session object parameter in the storage engines that participate in the transaction. More... | |
int | ha_savepoint (THD *thd, SAVEPOINT *sv) |
int | ha_release_savepoint (THD *thd, SAVEPOINT *sv) |
static bool | snapshot_handlerton (THD *thd, plugin_ref plugin, void *arg) |
int | ha_start_consistent_snapshot (THD *thd) |
static bool | flush_handlerton (THD *, plugin_ref plugin, void *arg) |
bool | ha_flush_logs (bool binlog_group_flush) |
Flush the log(s) of storage engine(s). More... | |
const char * | get_canonical_filename (handler *file, const char *path, char *tmp_path) |
make canonical filename More... | |
int | ha_delete_table (THD *thd, handlerton *table_type, const char *path, const char *db, const char *alias, const dd::Table *table_def, bool generate_warning) |
Delete table from the storage engine. More... | |
ulonglong | compute_next_insert_id (ulonglong nr, struct System_variables *variables) |
Generate the next auto-increment number based on increment and offset. More... | |
ulonglong | prev_insert_id (ulonglong nr, struct System_variables *variables) |
Computes the largest number X: More... | |
const char * | table_case_name (const HA_CREATE_INFO *info, const char *name) |
void | print_keydup_error (TABLE *table, KEY *key, const char *msg, myf errflag, const char *org_table_name) |
Construct and emit duplicate key error message using information from table's record buffer. More... | |
void | print_keydup_error (TABLE *table, KEY *key, myf errflag, const char *org_table_name) |
Construct and emit duplicate key error message using information from table's record buffer. More... | |
int | check_table_for_old_types (const TABLE *table) |
Function identifies any old data type present in table. More... | |
int | ha_enable_transaction (THD *thd, bool on) |
Tell the storage engine that it is allowed to "disable transaction" in the handler. More... | |
int | ha_create_table (THD *thd, const char *path, const char *db, const char *table_name, HA_CREATE_INFO *create_info, bool update_create_info, bool is_temp_table, dd::Table *table_def) |
Initiates table-file and calls appropriate database-creator. More... | |
int | ha_create_table_from_engine (THD *thd, const char *db, const char *name) |
Try to discover table from engine. More... | |
bool | ha_check_if_table_exists (THD *thd, const char *db, const char *name, bool *exists) |
Try to find a table in a storage engine. More... | |
static bool | check_if_system_table (const char *db, const char *table_name, bool *is_sql_layer_system_table) |
Check if a table specified by name is a system table. More... | |
bool | ha_check_if_supported_system_table (handlerton *hton, const char *db, const char *table_name) |
Check if a given table is a system table. More... | |
static bool | rm_tmp_tables_handlerton (THD *thd, plugin_ref plugin, void *files) |
bool | ha_rm_tmp_tables (THD *thd, List< LEX_STRING > *files) |
Ask all SEs to drop all temporary tables which have been left from previous server run. More... | |
bool | default_rm_tmp_tables (handlerton *hton, THD *, List< LEX_STRING > *files) |
Default implementation for handlerton::rm_tmp_tables() method which simply removes all files from "files" list which have one of SE's extensions. More... | |
int | ha_init_key_cache (std::string_view, KEY_CACHE *key_cache) |
Init a key cache if it has not been initied before. More... | |
int | ha_resize_key_cache (KEY_CACHE *key_cache) |
Resize key cache. More... | |
int | ha_change_key_cache (KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache) |
Move all tables from one key cache to another one. More... | |
static bool | discover_handlerton (THD *thd, plugin_ref plugin, void *arg) |
static bool | find_files_handlerton (THD *thd, plugin_ref plugin, void *arg) |
int | ha_find_files (THD *thd, const char *db, const char *path, const char *wild, bool dir, List< LEX_STRING > *files) |
static bool | table_exists_in_engine_handlerton (THD *thd, plugin_ref plugin, void *arg) |
int | ha_table_exists_in_engine (THD *thd, const char *db, const char *name) |
static bool | binlog_func_list (THD *, plugin_ref plugin, void *arg) |
Listing handlertons first to avoid recursive calls and deadlock. More... | |
static bool | binlog_func_foreach (THD *thd, binlog_func_st *bfn) |
int | ha_reset_logs (THD *thd) |
void | ha_reset_slave (THD *thd) |
void | ha_binlog_wait (THD *thd) |
int | ha_binlog_index_purge_file (THD *thd, const char *file) |
Inform storage engine(s) that a binary log file will be purged and any references to it should be removed. More... | |
void | ha_binlog_index_purge_wait (THD *thd) |
Request the storage engine to complete any operations that were initiated by ha_binlog_index_purge_file and which need to complete before PURGE BINARY LOGS completes. More... | |
static bool | binlog_log_query_handlerton2 (THD *thd, handlerton *hton, void *args) |
static bool | binlog_log_query_handlerton (THD *thd, plugin_ref plugin, void *args) |
void | ha_binlog_log_query (THD *thd, handlerton *hton, enum_binlog_command binlog_command, const char *query, size_t query_length, const char *db, const char *table_name) |
int | ha_binlog_end (THD *thd) |
static bool | acl_notify_handlerton (THD *thd, plugin_ref plugin, void *data) |
void | ha_acl_notify (THD *thd, class Acl_change_notification *data) |
static bool | key_uses_partial_cols (TABLE *table, uint keyno) |
Check if key has partially-covered columns. More... | |
static void | get_sort_and_sweep_cost (TABLE *table, ha_rows nrows, Cost_estimate *cost) |
void | get_sweep_read_cost (TABLE *table, ha_rows nrows, bool interrupted, Cost_estimate *cost) |
Get cost of reading nrows table records in a "disk sweep". More... | |
static bool | key_has_vcol (const KEY_PART_INFO *part, uint length) |
Check if one of the columns in a key is a virtual generated column. More... | |
static void | move_key_field_offsets (const key_range *range, const KEY_PART_INFO *key_part, ptrdiff_t diff) |
Change the offsets of all the fields in a key range. More... | |
uint | calculate_key_len (TABLE *table, uint key, key_part_map keypart_map) |
Calculates length of key. More... | |
static bool | exts_handlerton (THD *, plugin_ref plugin, void *arg) |
Returns a list of all known extensions. More... | |
TYPELIB * | ha_known_exts () |
static bool | stat_print (THD *thd, const char *type, size_t type_len, const char *file, size_t file_len, const char *status, size_t status_len) |
static bool | showstat_handlerton (THD *thd, plugin_ref plugin, void *arg) |
bool | ha_show_status (THD *thd, handlerton *db_type, enum ha_stat_type stat) |
static bool | check_table_binlog_row_based (THD *thd, TABLE *table) |
static int | write_locked_table_maps (THD *thd) |
Write table maps for all (manually or automatically) locked tables to the binary log. More... | |
int | binlog_log_row (TABLE *table, const uchar *before_record, const uchar *after_record, Log_func *log_func) |
static void | extract_blob_space_and_length_from_record_buff (const TABLE *table, const MY_BITMAP *const fields, blob_len_ptr *blob_len_ptr_array) |
Get the blob length and pointer of allocated space from the record buffer. More... | |
static void | copy_blob_data (const TABLE *table, const MY_BITMAP *const fields, blob_len_ptr *blob_len_ptr_array) |
Copy the value of BLOB virtual generated columns into the space allocated by storage engine. More... | |
static bool | my_eval_gcolumn_expr_helper (THD *thd, TABLE *table, const MY_BITMAP *const fields, uchar *record, bool in_purge, const char **mv_data_ptr, ulong *mv_length) |
static bool | notify_exclusive_mdl_helper (THD *thd, plugin_ref plugin, void *arg) |
bool | ha_notify_exclusive_mdl (THD *thd, const MDL_key *mdl_key, ha_notification_type notification_type, bool *victimized) |
Notify/get permission from all interested storage engines before acquisition or after release of exclusive metadata lock on object represented by key. More... | |
static bool | notify_table_ddl_helper (THD *thd, plugin_ref plugin, void *arg) |
bool | ha_notify_table_ddl (THD *thd, const MDL_key *mdl_key, ha_notification_type notification_type, ha_ddl_type ddl_type, const char *old_db_name, const char *old_table_name, const char *new_db_name, const char *new_table_name) |
Notify/get permission from all interested storage engines before or after executed DDL (ALTER TABLE, RENAME TABLE, TRUNCATE TABLE) on the table identified by key. More... | |
bool | set_tx_isolation (THD *thd, enum_tx_isolation tx_isolation, bool one_shot) |
Set the transaction isolation level for the next transaction and update session tracker information about the transaction isolation level. More... | |
static bool | post_recover_handlerton (THD *, plugin_ref plugin, void *) |
void | ha_post_recover (void) |
Perform SE-specific cleanup after recovery of transactions. More... | |
const handlerton * | SecondaryEngineHandlerton (const THD *thd) |
Returns the handlerton of the secondary engine that is used in the session, or nullptr if a secondary engine is not used. More... | |
static bool | is_reserved_db_name_handlerton (THD *, plugin_ref plugin, void *name) |
Checks if the database name is reserved word used by SE by invoking the handlerton method. More... | |
bool | ha_check_reserved_db_name (const char *name) |
Check if the database name is reserved word used by SE. More... | |
bool | is_index_access_error (int error) |
Check whether an error is index access error or not after an index read. More... | |
Variables | |
static Prealloced_array< bool, PREALLOC_NUM_HA > | builtin_htons (PSI_NOT_INSTRUMENTED) |
Array allowing to check if handlerton is builtin without acquiring LOCK_plugin. More... | |
static handlerton * | installed_htons [128] |
ulong | total_ha_2pc = 0 |
ulong | savepoint_alloc_size = 0 |
const Storage_engine_identifier | anonymous_namespace{handler.cc}::se_names [] |
const auto | anonymous_namespace{handler.cc}::se_names_end = std::end(se_names) |
std::vector< std::string > | anonymous_namespace{handler.cc}::disabled_se_names |
const char * | ha_row_type [] |
const char * | tx_isolation_names [] |
TYPELIB | tx_isolation_typelib |
static const char ** | handler_errmsgs |
Implements functions in the handler interface that are shared between all storage engines.
#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1) |
#define AUTO_INC_DEFAULT_NB_MAX_BITS 16 |
#define AUTO_INC_DEFAULT_NB_ROWS 1 |
Update the auto_increment field if necessary.
Updates columns with type NEXT_NUMBER if:
If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not set. In the future we will only set NEXT_NUMBER fields if one sets them to NULL (or they are not included in the insert list).
In those cases, we check if the currently reserved interval still has values we have not used. If yes, we pick the smallest one and use it. Otherwise:
In both cases, the reserved intervals are remembered in thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based binlogging; the last reserved interval is remembered in auto_inc_interval_for_cur_row. The number of reserved intervals is remembered in auto_inc_intervals_count. It differs from the number of elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the latter list is cumulative over all statements forming one binlog event (when stored functions and triggers are used), and collapses two contiguous intervals in one (see its append() method).
The idea is that generated auto_increment values are predictable and independent of the column values in the table. This is needed to be able to replicate into a table that already has rows with a higher auto-increment value than the one that is inserted.
After we have already generated an auto-increment number and the user inserts a column with a higher value than the last used one, we will start counting from the inserted value.
This function's "outputs" are: the table's auto_increment field is filled with a value, thd->next_insert_id is filled with the value to use for the next row, if a value was autogenerated for the current row it is stored in thd->insert_id_for_cur_row, if get_auto_increment() was called thd->auto_inc_interval_for_cur_row is modified, if that interval is not present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to this list.
0 | ok |
HA_ERR_AUTOINC_READ_FAILED | get_auto_increment() was called and returned ~(ulonglong) 0 |
HA_ERR_AUTOINC_ERANGE | storing value in field caused strict mode failure. |
#define MAX_HTON_LIST_ST 63 |
#define MYSQL_TABLE_IO_WAIT | ( | OP, | |
INDEX, | |||
RESULT, | |||
PAYLOAD | |||
) |
Instrumentation helper for table io_waits.
Note that this helper is intended to be used from within the handler class only, as it uses members from handler
Performance schema events are instrumented as follows:
m_psi_numrows
, so that end_psi_batch_mode()
generates a single event for the batch. OP | the table operation to be performed |
INDEX | the table index used if any, or MAX_KEY. |
RESULT | the result of the table operation performed |
PAYLOAD | instrumented code to execute |
#define MYSQL_TABLE_LOCK_WAIT | ( | OP, | |
FLAGS, | |||
PAYLOAD | |||
) |
Instrumentation helper for table io_waits.
OP | the table operation to be performed |
FLAGS | per table operation flags. |
PAYLOAD | the code to instrument. |
#define SETMSG | ( | nr, | |
msg | |||
) | handler_errmsgs[(nr)-HA_ERR_FIRST] = (msg) |
|
static |
|
static |
|
static |
Listing handlertons first to avoid recursive calls and deadlock.
|
static |
|
static |
int binlog_log_row | ( | TABLE * | table, |
const uchar * | before_record, | ||
const uchar * | after_record, | ||
Log_func * | log_func | ||
) |
uint calculate_key_len | ( | TABLE * | table, |
uint | key, | ||
key_part_map | keypart_map | ||
) |
Calculates length of key.
Given a key index and a map of key parts return length of buffer used by key parts.
table | Table containing the key |
key | Key index |
keypart_map | which key parts that is used |
|
static |
Called for each SE to check if given db, tablename is a system table.
The primary purpose of introducing this function is to stop system tables to be created or being moved to undesired storage engines.
plugin | Points to specific SE. |
arg | Is of type struct st_sys_tbl_chk_params. |
true | There was a match found. This will stop doing checks with other SE's. |
false | There was no match found. Other SE's will be checked to find a match. |
|
static |
Check if a table specified by name is a system table.
db | Database name for the table. | |
table_name | Table name to be checked. | |
[out] | is_sql_layer_system_table | True if a system table belongs to sql_layer. |
true | If the table name is a system table. |
false | If the table name is a user-level table. |
int check_table_for_old_types | ( | const TABLE * | table | ) |
Function identifies any old data type present in table.
This function was handler::check_old_types(). Function is not part of SE API. It is now converted to auxiliary standalone function.
[in] | table | TABLE object |
0 | ON SUCCESS |
error | code ON FAILURE |
|
static |
std::pair< int, bool > commit_owned_gtids | ( | THD * | thd, |
bool | all | ||
) |
The function computes condition to call gtid persistor wrapper, and executes it.
It is invoked at committing a statement or transaction, including XA, and also at XA prepare handling.
thd | Thread context. |
all | The execution scope, true for the transaction one, false for the statement one. |
0 | Ok |
!0 | Error |
Owned GTID release status
true | remove the GTID owned by thread from owned GTIDs |
false | removal of the GTID owned by thread from owned GTIDs is not required |
|
inline |
Generate the next auto-increment number based on increment and offset.
computes the lowest number
1,2,3,4,5,...If increment=10 and offset=5 and previous number is 1, we get:
1,5,15,25,35,...
|
static |
Copy the value of BLOB virtual generated columns into the space allocated by storage engine.
This is because the table is closed after evaluating the value. In order to keep the BLOB value after the table is closed, we have to copy the value into the place where storage engine prepares for.
table | pointer of the table to be operated on |
fields | bitmap of field index of evaluated generated column |
blob_len_ptr_array | array of length and pointer of allocated space by storage engine. |
bool default_rm_tmp_tables | ( | handlerton * | hton, |
THD * | thd, | ||
List< LEX_STRING > * | files | ||
) |
Default implementation for handlerton::rm_tmp_tables() method which simply removes all files from "files" list which have one of SE's extensions.
This implementation corresponds to default implementation of handler::delete_table() method.
|
static |
|
static |
|
static |
Check if error came from SE that defers processing to commit time.
Deferred transaction processing is common in distributed SE where row changes are processed in parallel during commit, this is essential for performance but results in less localised error handling due to 'lazy evaluation'. Thus errors will potentially show up as part of COMMIT processing (where all pending work must be finalised). This is a major difference compared to local SE that process row changes serially, in that scenario defined operations are processed as they are defined and errors can be handled directly.
[in] | thd | The THD pointer |
true | Error came from SE that uses deferred processing |
|
static |
Get the blob length and pointer of allocated space from the record buffer.
During evaluating the blob virtual generated columns, the blob space will be allocated by server. In order to keep the blob data after the table is closed, we need write the data into a specified space allocated by storage engine. Here, we have to extract the space pointer and length from the record buffer. After we get the value of virtual generated columns, copy the data into the specified space and store it in the record buffer (
table | the pointer of table | |
fields | bitmap of field index of evaluated generated column | |
[out] | blob_len_ptr_array | an array to record the length and pointer of allocated space by storage engine. |
|
static |
Returns a list of all known extensions.
No mutexes, worst case race is a minor surplus memory allocation We have to recreate the extension map if mysqld is restarted (for example within libmysqld)
pointer | pointer to TYPELIB structure |
|
static |
|
static |
const char * get_canonical_filename | ( | handler * | file, |
const char * | path, | ||
char * | tmp_path | ||
) |
make canonical filename
[in] | file | table handler |
[in] | path | original path |
[out] | tmp_path | buffer for canonized path |
Lower case db name and table name path parts for non file based tables when lower_case_table_names is 2 (store as is, compare in lower case). Filesystem path prefix (mysql_data_home or tmpdir) is left intact.
canonized | path |
|
static |
handler * get_new_handler | ( | TABLE_SHARE * | share, |
bool | partitioned, | ||
MEM_ROOT * | alloc, | ||
handlerton * | db_type | ||
) |
Create handler object for the table in the storage engine.
share | TABLE_SHARE for the table, can be NULL if caller didn't perform full-blown open of table definition. |
partitioned | Indicates whether table is partitioned. |
alloc | Memory root to be used for allocating handler object. |
db_type | Table's storage engine. |
|
static |
void get_sweep_read_cost | ( | TABLE * | table, |
ha_rows | nrows, | ||
bool | interrupted, | ||
Cost_estimate * | cost | ||
) |
Get cost of reading nrows table records in a "disk sweep".
A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made for an ordered sequence of rowids.
We take into account that some of the records might be in a memory buffer while others need to be read from a secondary storage device. The model for this assumes hard disk IO. A disk read is performed as follows:
Time to do #3 is insignificant compared to #2+#1.
Time to move the disk head is proportional to head travel distance.
Time to wait for the plate to rotate depends on whether the disk head was moved or not.
If disk head wasn't moved, the wait time is proportional to distance between the previous block and the block we're reading.
If the head was moved, we don't know how much we'll need to wait for the plate to rotate. We assume the wait time to be a variate with a mean of 0.5 of full rotation time.
Our cost units are "random disk seeks". The cost of random disk seek is actually not a constant, it depends one range of cylinders we're going to access. We make it constant by introducing a fuzzy concept of "typical datafile length" (it's fuzzy as it's hard to tell whether it should include index file, temp.tables etc). Then random seek cost is:
1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
We define half_rotation_cost as disk_seek_base_cost() (see Cost_model_server::disk_seek_base_cost()).
table | Table to be accessed | |
nrows | Number of rows to retrieve | |
interrupted | true <=> Assume that the disk sweep will be interrupted by other disk IO. false - otherwise. | |
[out] | cost | the cost |
void ha_acl_notify | ( | THD * | thd, |
class Acl_change_notification * | data | ||
) |
int ha_binlog_end | ( | THD * | thd | ) |
int ha_binlog_index_purge_file | ( | THD * | thd, |
const char * | file | ||
) |
Inform storage engine(s) that a binary log file will be purged and any references to it should be removed.
The function is called for all purged files, regardless if it is an explicit PURGE BINARY LOGS statement, or an automatic purge performed by the server.
ha_binlog_index_purge_wait
callback to wait for completion.thd | Thread handle of session purging file. The nullptr value indicates that purge is done at server startup. |
file | Name of file being purged. |
void ha_binlog_index_purge_wait | ( | THD * | thd | ) |
Request the storage engine to complete any operations that were initiated by ha_binlog_index_purge_file
and which need to complete before PURGE BINARY LOGS completes.
The function is called only from PURGE BINARY LOGS. Each PURGE BINARY LOGS statement will result in 0, 1 or more calls to ha_binlog_index_purge_file
, followed by exactly 1 call to ha_binlog_index_purge_wait
.
thd | Thread handle of session. |
void ha_binlog_log_query | ( | THD * | thd, |
handlerton * | hton, | ||
enum_binlog_command | binlog_command, | ||
const char * | query, | ||
size_t | query_length, | ||
const char * | db, | ||
const char * | table_name | ||
) |
void ha_binlog_wait | ( | THD * | thd | ) |
Move all tables from one key cache to another one.
|
static |
Check if we can skip the two-phase commit.
A helper function to evaluate if two-phase commit is mandatory. As a side effect, propagates the read-only/read-write flags of the statement transaction to its enclosing normal transaction.
If we have at least two engines with read-write changes we must run a two-phase commit. Otherwise we can run several independent commits as the only transactional engine has read-write changes and others are read-only.
0 | All engines are read-only. |
1 | We have the only engine with read-write changes. |
>1 | More than one engine have read-write changes. Note: return value might NOT be the exact number of engines with read-write changes. |
bool ha_check_if_supported_system_table | ( | handlerton * | hton, |
const char * | db, | ||
const char * | table_name | ||
) |
Check if a given table is a system table.
The primary purpose of introducing this function is to stop system tables to be created or being moved to undesired storage engines.
hton | Handlerton of new engine. |
db | Database name. |
table_name | Table name to be checked. |
true | If the table name is a valid system table or if its a valid user table. |
false | If the table name is a system table name and does not belong to engine specified in the command. |
bool ha_check_if_table_exists | ( | THD * | thd, |
const char * | db, | ||
const char * | name, | ||
bool * | exists | ||
) |
Try to find a table in a storage engine.
thd | Thread handle | |
db | Normalized table schema name | |
name | Normalized table name. | |
[out] | exists | Only valid if the function succeeded. |
true | An error is found |
false | Success, check *exists |
bool ha_check_reserved_db_name | ( | const char * | name | ) |
Check if the database name is reserved word used by SE.
name | Database name. |
true | If the name is a reserved word. |
false | If the name is not a reserved word. |
handlerton * ha_checktype | ( | THD * | thd, |
enum legacy_db_type | database_type, | ||
bool | no_substitute, | ||
bool | report_error | ||
) |
Use other database handler if databasehandler is not compiled in.
void ha_close_connection | ( | THD * | thd | ) |
int ha_commit_attachable | ( | THD * | thd | ) |
Commit the attachable transaction in storage engines.
thd | Current thread |
0 | - Success |
non-0 | - Failure |
int ha_commit_low | ( | THD * | thd, |
bool | all, | ||
bool | run_after_commit | ||
) |
Commit the sessions outstanding transaction.
[in] | thd | Thread handle. |
[in] | all | Is set in case of explicit commit (COMMIT statement), or implicit commit issued by DDL. Is not set when called at the end of statement, even if autocommit=1. |
[in] | run_after_commit | True by default, otherwise, does not execute the after_commit hook in the function. |
int ha_commit_trans | ( | THD * | thd, |
bool | all, | ||
bool | ignore_global_read_lock | ||
) |
[in] | thd | Thread handle. |
[in] | all | Session transaction if true, statement otherwise. |
[in] | ignore_global_read_lock | Allow commit to complete even if a global read lock is active. This can be used to allow changes to internal tables (e.g. slave status tables). |
0 | ok |
1 | transaction was rolled back |
2 | error during commit, data may be inconsistent |
int ha_create_table | ( | THD * | thd, |
const char * | path, | ||
const char * | db, | ||
const char * | table_name, | ||
HA_CREATE_INFO * | create_info, | ||
bool | update_create_info, | ||
bool | is_temp_table, | ||
dd::Table * | table_def | ||
) |
Initiates table-file and calls appropriate database-creator.
thd | Thread context. |
path | Path to table file (without extension). |
db | Database name. |
table_name | Table name. |
create_info | HA_CREATE_INFO describing table. |
update_create_info | Indicates that create_info needs to be updated from table share. |
is_temp_table | Indicates that this is temporary table (for cases when this info is not available from HA_CREATE_INFO). |
table_def | Data-dictionary object describing table to be used for table creation. Can be adjusted by storage engine if it supports atomic DDL. For non-temporary tables these changes will be saved to the data-dictionary by this call. |
0 | ok |
1 | error |
int ha_create_table_from_engine | ( | THD * | thd, |
const char * | db, | ||
const char * | name | ||
) |
Try to discover table from engine.
-1 | Table did not exists |
0 | Table created ok |
<blockquote>0 | Error, table existed but could not be created |
handlerton * ha_default_handlerton | ( | THD * | thd | ) |
Return the default storage engine handlerton used for non-temp tables for thread.
SYNOPSIS ha_default_handlerton(thd) thd current thread
RETURN pointer to handlerton
|
static |
handlerton * ha_default_temp_handlerton | ( | THD * | thd | ) |
Return the default storage engine handlerton used for explicitly created temp tables for a thread.
SYNOPSIS ha_default_temp_handlerton(thd) thd current thread
RETURN pointer to handlerton
|
static |
int ha_delete_table | ( | THD * | thd, |
handlerton * | table_type, | ||
const char * | path, | ||
const char * | db, | ||
const char * | alias, | ||
const dd::Table * | table_def, | ||
bool | generate_warning | ||
) |
Delete table from the storage engine.
thd | Thread context. |
table_type | Handlerton for table's SE. |
path | Path to table (without extension). |
db | Table database. |
alias | Table name. |
table_def | dd::Table object describing the table. |
generate_warning | Indicates whether errors during deletion should be reported as warnings. |
|
static |
Try to discover one table from handler(s).
[in] | thd | Thread context. |
[in] | db | Schema of table |
[in] | name | Name of table |
[out] | frmblob | Pointer to blob with table definition. |
[out] | frmlen | Length of the returned table definition blob |
-1 | Table did not exists |
0 | OK. Table could be discovered from SE. The *frmblob and *frmlen may be set if returning a blob which should be installed into data dictionary by the caller. |
>0 | error. frmblob and frmlen may not be set |
void ha_drop_database | ( | char * | path | ) |
int ha_enable_transaction | ( | THD * | thd, |
bool | on | ||
) |
Tell the storage engine that it is allowed to "disable transaction" in the handler.
It is a hint that ACID is not required - it is used in NDB for ALTER TABLE, for example, when data are copied to temporary table. A storage engine may treat this hint any way it likes. NDB for example starts to commit every now and then automatically. This hint can be safely ignored.
void ha_end | ( | ) |
int ha_finalize_handlerton | ( | st_plugin_int * | plugin | ) |
int ha_find_files | ( | THD * | thd, |
const char * | db, | ||
const char * | path, | ||
const char * | wild, | ||
bool | dir, | ||
List< LEX_STRING > * | files | ||
) |
bool ha_flush_logs | ( | bool | binlog_group_flush = false | ) |
Flush the log(s) of storage engine(s).
binlog_group_flush | true if we got invoked by binlog group commit during flush stage, false in other cases. |
false | Succeed |
true | Error |
int ha_init | ( | void | ) |
int ha_init_errors | ( | void | ) |
Register handler error messages for use with my_error().
0 | OK |
!=0 | Error |
int ha_init_key_cache | ( | std::string_view | name, |
KEY_CACHE * | key_cache | ||
) |
Init a key cache if it has not been initied before.
int ha_initialize_handlerton | ( | st_plugin_int * | plugin | ) |
bool ha_is_externally_disabled | ( | const handlerton & | htnr | ) |
Returns true if the storage engine of the handlerton argument has been listed in the disabled_storage_engines system variable.
bool ha_is_storage_engine_disabled | ( | handlerton * | se_handle | ) |
void ha_kill_connection | ( | THD * | thd | ) |
TYPELIB * ha_known_exts | ( | ) |
plugin_ref ha_lock_engine | ( | THD * | thd, |
const handlerton * | hton | ||
) |
bool ha_notify_exclusive_mdl | ( | THD * | thd, |
const MDL_key * | mdl_key, | ||
ha_notification_type | notification_type, | ||
bool * | victimized | ||
) |
Notify/get permission from all interested storage engines before acquisition or after release of exclusive metadata lock on object represented by key.
thd | Thread context. |
mdl_key | MDL key identifying object on which exclusive lock is to be acquired/was released. |
notification_type | Indicates whether this is pre-acquire or post-release notification. |
victimized | 'true' if locking failed as we were selected as a victim in order to avoid possible deadlocks. |
See
bool ha_notify_table_ddl | ( | THD * | thd, |
const MDL_key * | mdl_key, | ||
ha_notification_type | notification_type, | ||
ha_ddl_type | ddl_type, | ||
const char * | old_db_name, | ||
const char * | old_table_name, | ||
const char * | new_db_name, | ||
const char * | new_table_name | ||
) |
Notify/get permission from all interested storage engines before or after executed DDL (ALTER TABLE, RENAME TABLE, TRUNCATE TABLE) on the table identified by key.
thd | Thread context. |
mdl_key | MDL key identifying table. |
notification_type | Indicates whether this is pre-DDL or post-DDL notification. |
old_db_name | Old db name, used in RENAME DDL |
old_table_name | Old table name, used in RENAME DDL |
new_db_name | New db name, used in RENAME DDL |
new_table_name | New table name, used in RENAME DDL |
See
void ha_post_recover | ( | ) |
Perform SE-specific cleanup after recovery of transactions.
void ha_pre_dd_shutdown | ( | void | ) |
Invoke handlerton::pre_dd_shutdown() on every storage engine plugin.
int ha_prepare_low | ( | THD * | thd, |
bool | all | ||
) |
Prepares the underlying transaction of the THD session object parameter in the storage engines that participate in the transaction.
In case of failure, an error will be emitted by the function in the case of internally coordinated transactions. In the case of externally coordinated transactions (XA), the error treatment must follow the XA/Open specification and is handled by the Sql_cmd_xa_prepare
class.
thd | The THD session object holding the transaction to be prepared. |
all | Whether or not the prepare regards a full transaction or the statement being executed.. |
int ha_reset_logs | ( | THD * | thd | ) |
void ha_reset_plugin_vars | ( | THD * | thd | ) |
void ha_reset_slave | ( | THD * | thd | ) |
int ha_resize_key_cache | ( | KEY_CACHE * | key_cache | ) |
Resize key cache.
handlerton * ha_resolve_by_legacy_type | ( | THD * | thd, |
enum legacy_db_type | db_type | ||
) |
plugin_ref ha_resolve_by_name | ( | THD * | thd, |
const LEX_CSTRING * | name, | ||
bool | is_temp_table | ||
) |
Return the storage engine handlerton for the supplied name.
thd | Current thread. May be nullptr, (e.g. during initialize). |
name | Name of storage engine. |
is_temp_table | true if table is a temporary table. |
plugin_ref ha_resolve_by_name_raw | ( | THD * | thd, |
const LEX_CSTRING & | name | ||
) |
Resolve handlerton plugin by name, without checking for "DEFAULT" or HTON_NOT_USER_SELECTABLE.
thd | Thread context. |
name | Plugin name. |
const char * ha_resolve_storage_engine_name | ( | const handlerton * | db_type | ) |
bool ha_rm_tmp_tables | ( | THD * | thd, |
List< LEX_STRING > * | files | ||
) |
Ask all SEs to drop all temporary tables which have been left from previous server run.
Used on server start-up.
[in] | thd | Thread context. |
[in,out] | files | List of files in directories for temporary files which match tmp_file_prefix and thus can belong to temporary tables. If any SE recognizes some file as belonging to temporary table in this SE and deletes the file it is also supposed to remove file from this list. |
int ha_rollback_low | ( | THD * | thd, |
bool | all | ||
) |
bool ha_rollback_to_savepoint_can_release_mdl | ( | THD * | thd | ) |
Check if all storage engines used in transaction agree that after rollback to savepoint it is safe to release MDL locks acquired after savepoint creation.
thd | The client thread that executes the transaction. |
Checking whether it is safe to release metadata locks after rollback to savepoint in all the storage engines that are part of the transaction.
int ha_rollback_trans | ( | THD * | thd, |
bool | all | ||
) |
|
noexcept |
bool ha_show_status | ( | THD * | thd, |
handlerton * | db_type, | ||
enum ha_stat_type | stat | ||
) |
int ha_start_consistent_snapshot | ( | THD * | thd | ) |
int ha_table_exists_in_engine | ( | THD * | thd, |
const char * | db, | ||
const char * | name | ||
) |
st_plugin_int * hton2plugin | ( | uint | slot | ) |
Maps from slot to plugin. May return NULL if plugin has been unloaded.
|
static |
st_plugin_int * insert_hton2plugin | ( | uint | slot, |
st_plugin_int * | plugin | ||
) |
For unit testing.
Insert plugin into arbitrary slot in array. Remove plugin from arbitrary slot in array.
bool is_ha_commit_low_invoking_commit_order | ( | THD * | thd, |
bool | all | ||
) |
Determines whether ha_commit_low may invoke commit ordering.
[in] | thd | Thread handle. |
[in] | all | Is set in case of explicit commit (COMMIT statement), or implicit commit issued by DDL. Is not set when called at the end of statement, even if autocommit=1. |
true | ha_commit_low invokes commit order |
false | ha_commit_low does not invoke commit order |
Preserve externalization and persistence order for applier threads. The conditions should be understood as follows:
Note: the calls to Commit_order_manager::wait/wait_and_finish() will be no-op for threads other than replication applier threads.
bool is_index_access_error | ( | int | error | ) |
Check whether an error is index access error or not after an index read.
Error other than HA_ERR_END_OF_FILE or HA_ERR_KEY_NOT_FOUND will stop next index read.
error | Handler error code. |
true | if error is different from HA_ERR_END_OF_FILE or HA_ERR_KEY_NOT_FOUND. |
false | if error is HA_ERR_END_OF_FILE or HA_ERR_KEY_NOT_FOUND. |
|
static |
Checks if the database name is reserved word used by SE by invoking the handlerton method.
plugin | SE plugin. |
name | Database name. |
true | If the name is reserved word. |
false | If the name is not reserved word. |
|
static |
|
static |
Check if one of the columns in a key is a virtual generated column.
part | the first part of the key to check |
length | the length of the key |
true | if the key contains a virtual generated column |
false | if the key does not contain a virtual generated column |
|
static |
Check if key has partially-covered columns.
We can't use DS-MRR to perform range scans when the ranges are over partially-covered keys, because we'll not have full key part values (we'll have their prefixes from the index) and will not be able to check if we've reached the end the range.
table | Table to check keys for |
keyno | Key to check |
true | Yes |
false | No |
|
static |
|
inlinestatic |
Change the offsets of all the fields in a key range.
range | the key range |
key_part | the first key part |
diff | how much to change the offsets with |
|
static |
|
static |
|
static |
size_t num_hton2plugins | ( | ) |
Returns the size of the array holding pointers to plugins.
|
static |
|
static |
Invoke handlerton::pre_dd_shutdown() on a plugin.
plugin | storage engine plugin |
false | (always) |
|
inline |
Computes the largest number X:
SYNOPSIS prev_insert_id nr Number to "round down" variables variables struct containing auto_increment_increment and auto_increment_offset
RETURN The number X if it exists, "nr" otherwise.
void print_keydup_error | ( | TABLE * | table, |
KEY * | key, | ||
const char * | msg, | ||
myf | errflag, | ||
const char * | org_table_name | ||
) |
Construct and emit duplicate key error message using information from table's record buffer.
table | TABLE object which record buffer should be used as source for column values. |
key | Key description. |
msg | Error message template to which key value should be added. |
errflag | Flags for my_error() call. |
org_table_name | The original table name (if any) |
Construct and emit duplicate key error message using information from table's record buffer.
st_plugin_int * remove_hton2plugin | ( | uint | slot | ) |
|
static |
|
static |
|
static |
While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type.
Remove when legacy_db_type is finally gone
const handlerton * SecondaryEngineHandlerton | ( | const THD * | thd | ) |
Returns the handlerton of the secondary engine that is used in the session, or nullptr if a secondary engine is not used.
void set_externally_disabled_storage_engine_names | ( | const char * | disabled_list | ) |
Read a comma-separated list of storage engine names.
Look up each in the known list of canonical and legacy names. In case of a match; add both the canonical and the legacy name to disabled_se_names, which is a static vector of disabled storage engine names. If there is no match, the unmodified name is added to the vector.
bool set_tx_isolation | ( | THD * | thd, |
enum_tx_isolation | tx_isolation, | ||
bool | one_shot | ||
) |
Set the transaction isolation level for the next transaction and update session tracker information about the transaction isolation level.
thd | THD session setting the tx_isolation. |
tx_isolation | The isolation level to be set. |
one_shot | True if the isolation level should be restored to session default after finishing the transaction. |
|
static |
|
static |
|
static |
const char * table_case_name | ( | const HA_CREATE_INFO * | info, |
const char * | name | ||
) |
|
static |
void trans_register_ha | ( | THD * | thd, |
bool | all, | ||
handlerton * | ht_arg, | ||
const ulonglong * | trxid | ||
) |
Register a storage engine for a transaction.
In each client connection, MySQL maintains two transactional states:
"Statement transaction" is a non-standard term that comes from the times when MySQL supported BerkeleyDB storage engine.
First of all, it should be said that in BerkeleyDB auto-commit mode auto-commits operations that are atomic to the storage engine itself, such as a write of a record, and are too high-granular to be atomic from the application perspective (MySQL). One SQL statement could involve many BerkeleyDB auto-committed operations and thus BerkeleyDB auto-commit was of little use to MySQL.
Secondly, instead of SQL standard savepoints, BerkeleyDB provided the concept of "nested transactions". In a nutshell, transactions could be arbitrarily nested, but when the parent transaction was committed or aborted, all its child (nested) transactions were handled committed or aborted as well. Commit of a nested transaction, in turn, made its changes visible, but not durable: it destroyed the nested transaction, all its changes would become available to the parent and currently active nested transactions of this parent.
So the mechanism of nested transactions was employed to provide "all or nothing" guarantee of SQL statements required by the standard. A nested transaction would be created at start of each SQL statement, and destroyed (committed or aborted) at statement end. Such nested transaction was internally referred to as a "statement transaction" and gave birth to the term.
(Historical note ends)
Since then a statement transaction is started for each statement that accesses transactional tables or uses the binary log. If the statement succeeds, the statement transaction is committed. If the statement fails, the transaction is rolled back. Commits of statement transactions are not durable – each such transaction is nested in the normal transaction, and if the normal transaction is rolled back, the effects of all enclosed statement transactions are undone as well. Technically, a statement transaction can be viewed as a savepoint which is maintained automatically in order to make effects of one statement atomic.
The normal transaction is started by the user and is ended usually upon a user request as well. The normal transaction encloses transactions of all statements issued between its beginning and its end. In autocommit mode, the normal transaction is equivalent to the statement transaction.
Since MySQL supports PSEA (pluggable storage engine architecture), more than one transactional engine can be active at a time. Hence transactions, from the server point of view, are always distributed. In particular, transactional state is maintained independently for each engine. In order to commit a transaction the two phase commit protocol is employed.
Not all statements are executed in context of a transaction. Administrative and status information statements do not modify engine data, and thus do not start a statement transaction and also have no effect on the normal transaction. Examples of such statements are SHOW STATUS and RESET REPLICA.
Similarly DDL statements are not transactional, and therefore a transaction is [almost] never started for a DDL statement. The difference between a DDL statement and a purely administrative statement though is that a DDL statement always commits the current transaction before proceeding, if there is any.
At last, SQL statements that work with non-transactional engines also have no effect on the transaction state of the connection. Even though they are written to the binary log, and the binary log is, overall, transactional, the writes are done in "write-through" mode, directly to the binlog file, followed with a OS cache sync, in other words, bypassing the binlog undo log (translog). They do not commit the current normal transaction. A failure of a statement that uses non-transactional tables would cause a rollback of the statement transaction, but in case there no non-transactional tables are used, no statement transaction is started.
The server stores its transaction-related data in thd->transaction. This structure has two members of type THD_TRANS. These members correspond to the statement and normal transactions respectively:
In autocommit mode thd->transaction.all is empty. Instead, data of thd->transaction.stmt is used to commit/rollback the normal transaction.
The list of registered engines has a few important properties:
When a new connection is established, thd->transaction members are initialized to an empty state. If a statement uses any tables, all affected engines are registered in the statement engine list. In non-autocommit mode, the same engines are registered in the normal transaction list. At the end of the statement, the server issues a commit or a roll back for all engines in the statement list. At this point transaction flags of an engine, if any, are propagated from the statement list to the list of the normal transaction. When commit/rollback is finished, the statement list is cleared. It will be filled in again by the next statement, and emptied again at the next statement's end.
The normal transaction is committed in a similar way (by going over all engines in thd->transaction.all list) but at different times:
The normal transaction can be rolled back as well:
As follows from the use cases above, the normal transaction is never committed when there is an outstanding statement transaction. In most cases there is no conflict, since commits of the normal transaction are issued by a stand-alone administrative or DDL statement, thus no outstanding statement transaction of the previous statement exists. Besides, all statements that manipulate with the normal transaction are prohibited in stored functions and triggers, therefore no conflicting situation can occur in a sub-statement either. The remaining rare cases when the server explicitly has to commit the statement transaction prior to committing the normal one cover error-handling scenarios (see for example SQLCOM_LOCK_TABLES).
When committing a statement or a normal transaction, the server either uses the two-phase commit protocol, or issues a commit in each engine independently. The two-phase commit protocol is used only if:
Note that the two phase commit is used for statement transactions, even though they are not durable anyway. This is done to ensure logical consistency of data in a multiple- engine transaction. For example, imagine that some day MySQL supports unique constraint checks deferred till the end of statement. In such case a commit in one of the engines may yield ER_DUP_KEY, and MySQL should be able to gracefully abort statement transactions of other participants.
After the normal transaction has been committed, thd->transaction.all list is cleared.
When a connection is closed, the current normal transaction, if any, is rolled back.
The server has no way to know that an engine participates in the statement and a transaction has been started in it unless the engine says so. Thus, in order to be a part of a transaction, the engine must "register" itself. This is done by invoking trans_register_ha() server call. Normally the engine registers itself whenever handler::external_lock() is called. trans_register_ha() can be invoked many times: if an engine is already registered, the call does nothing. In case autocommit is not set, the engine must register itself twice – both in the statement list and in the normal transaction list. In which list to register is a parameter of trans_register_ha().
Note, that although the registration interface in itself is fairly clear, the current usage practice often leads to undesired effects. E.g. since a call to trans_register_ha() in most engines is embedded into implementation of handler::external_lock(), some DDL statements start a transaction (at least from the server point of view) even though they are not expected to. E.g. CREATE TABLE does not start a transaction, since handler::external_lock() is never called during CREATE TABLE. But CREATE TABLE ... SELECT does, since handler::external_lock() is called for the table that is being selected from. This has no practical effects currently, but must be kept in mind nevertheless.
Once an engine is registered, the server will do the rest of the work.
During statement execution, whenever any of data-modifying PSEA API methods is used, e.g. handler::write_row() or handler::update_row(), the read-write flag is raised in the statement transaction for the involved engine. Currently All PSEA calls are "traced", and the data can not be changed in a way other than issuing a PSEA call. Important: unless this invariant is preserved the server will not know that a transaction in a given engine is read-write and will not involve the two-phase commit protocol!
At the end of a statement, server call trans_commit_stmt is invoked. This call in turn invokes handlerton::prepare() for every involved engine. Prepare is followed by a call to handlerton::commit_one_phase() If a one-phase commit will suffice, handlerton::prepare() is not invoked and the server only calls handlerton::commit_one_phase(). At statement commit, the statement-related read-write engine flag is propagated to the corresponding flag in the normal transaction. When the commit is complete, the list of registered engines is cleared.
Rollback is handled in a similar fashion.
DDLs and operations with non-transactional engines do not "register" in thd->transaction lists, and thus do not modify the transaction state. Besides, each DDL in MySQL is prefixed with an implicit normal transaction commit (a call to trans_commit_implicit()), and thus leaves nothing to modify. However, as it has been pointed out with CREATE TABLE .. SELECT, some DDL statements can start a new transaction.
Behaviour of the server in this case is currently badly defined. DDL statements use a form of "semantic" logging to maintain atomicity: if CREATE TABLE .. SELECT failed, the newly created table is deleted. In addition, some DDL statements issue interim transaction commits: e.g. ALTER TABLE issues a commit after data is copied from the original table to the internal temporary table. Other statements, e.g. CREATE TABLE ... SELECT do not always commit after itself. And finally there is a group of DDL statements such as RENAME/DROP TABLE that doesn't start a new transaction and doesn't commit.
This diversity makes it hard to say what will happen if by chance a stored function is invoked during a DDL – whether any modifications it makes will be committed or not is not clear. Fortunately, SQL grammar of few DDLs allows invocation of a stored function.
A consistent behaviour is perhaps to always commit the normal transaction after all DDLs, just like the statement transaction is always committed at the end of all statements.
Every storage engine MUST call this function when it starts a transaction or a statement (that is it must be called both for the "beginning of transaction" and "beginning of statement"). Only storage engines registered for the transaction/statement will know when to commit/rollback it.
|
static |
Write table maps for all (manually or automatically) locked tables to the binary log.
SYNOPSIS write_locked_table_maps() thd Pointer to THD structure
DESCRIPTION This function will generate and write table maps for all tables that are locked by the thread 'thd'.
RETURN VALUE 0 All OK 1 Failed to write all table maps
SEE ALSO THD::lock
|
static |
Array allowing to check if handlerton is builtin without acquiring LOCK_plugin.
const char* ha_row_type[] |
|
static |
|
static |
ulong savepoint_alloc_size = 0 |
ulong total_ha_2pc = 0 |
const char* tx_isolation_names[] |
TYPELIB tx_isolation_typelib |