MySQL 9.0.0
Source Code Documentation
|
The index tree cursor. More...
#include "btr0cur.h"
#include <assert.h>
#include "my_dbug.h"
#include "string_with_len.h"
#include <zlib.h>
#include "btr0btr.h"
#include "btr0sea.h"
#include "buf0lru.h"
#include "current_thd.h"
#include "debug_sync.h"
#include "ibuf0ibuf.h"
#include "lob0lob.h"
#include "lock0lock.h"
#include "mtr0log.h"
#include "row0upd.h"
#include "page0page.h"
#include "page0zip.h"
#include "que0que.h"
#include "rem0cmp.h"
#include "rem0rec.h"
#include "row0log.h"
#include "row0purge.h"
#include "row0row.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "trx0rec.h"
#include "trx0roll.h"
#include <array>
Macros | |
#define | BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) |
In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized. More... | |
Enumerations | |
enum | btr_op_t { BTR_NO_OP = 0 , BTR_INSERT_OP , BTR_INSERT_IGNORE_UNIQUE_OP , BTR_DELETE_OP , BTR_DELMARK_OP } |
Buffered B-tree operation types, introduced as part of delete buffering. More... | |
enum | btr_intention_t { BTR_INTENTION_DELETE , BTR_INTENTION_BOTH , BTR_INTENTION_INSERT } |
Modification types for the B-tree operation. More... | |
Functions | |
constexpr uint64_t | BTR_TABLE_STATS_FROM_SAMPLE (uint64_t value, dict_index_t *index, uint64_t sample, ulint ext_size, ulint not_empty) |
Estimated table level stats from sampled value. More... | |
static void | btr_cur_add_path_info (btr_cur_t *cursor, ulint height, ulint root_height) |
Adds path information to the cursor for the current page, for which the binary search has been performed. More... | |
btr_latch_leaves_t | btr_cur_latch_leaves (buf_block_t *block, const page_id_t &page_id, const page_size_t &page_size, ulint latch_mode, btr_cur_t *cursor, mtr_t *mtr) |
Latches the leaf page or pages requested. More... | |
bool | btr_cur_optimistic_latch_leaves (buf_block_t *block, uint64_t modify_clock, ulint *latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr) |
Optimistically latches the leaf page or pages requested. More... | |
static btr_intention_t | btr_cur_get_and_clear_intention (ulint *latch_mode) |
Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. More... | |
static rw_lock_type_t | btr_cur_latch_for_root_leaf (ulint latch_mode) |
Gets the desired latch type for the root leaf (root page is root leaf) at the latch mode. More... | |
static bool | btr_cur_will_modify_tree (dict_index_t *index, const page_t *page, btr_intention_t lock_intention, const rec_t *rec, ulint rec_size, const page_size_t &page_size, mtr_t *mtr) |
Detects whether the modifying record might need a modifying tree structure. More... | |
static bool | btr_cur_need_opposite_intention (const page_t *page, btr_intention_t lock_intention, const rec_t *rec) |
Detects whether the modifying record might need a opposite modification to the intention. More... | |
void | btr_cur_search_to_nth_level (dict_index_t *index, ulint level, const dtuple_t *tuple, page_cur_mode_t mode, ulint latch_mode, btr_cur_t *cursor, ulint has_search_latch, const char *file, ulint line, mtr_t *mtr) |
Searches an index tree and positions a tree cursor on a given level. More... | |
void | btr_cur_search_to_nth_level_with_no_latch (dict_index_t *index, ulint level, const dtuple_t *tuple, page_cur_mode_t mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr, bool mark_dirty) |
Searches an index tree and positions a tree cursor on a given level. More... | |
void | btr_cur_open_at_index_side (bool from_left, dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, ulint level, ut::Location location, mtr_t *mtr) |
Opens a cursor at either end of an index. More... | |
void | btr_cur_open_at_index_side_with_no_latch (bool from_left, dict_index_t *index, btr_cur_t *cursor, ulint level, ut::Location location, mtr_t *mtr) |
Opens a cursor at either end of an index. More... | |
bool | btr_cur_open_at_rnd_pos (dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr) |
Positions a cursor at a randomly chosen position within a B-tree. More... | |
static rec_t * | btr_cur_insert_if_possible (btr_cur_t *cursor, const dtuple_t *tuple, ulint **offsets, mem_heap_t **heap, mtr_t *mtr) |
Inserts a record if there is enough space, or if enough space can be freed by reorganizing. More... | |
static dberr_t | btr_cur_ins_lock_and_undo (ulint flags, btr_cur_t *cursor, dtuple_t *entry, que_thr_t *thr, mtr_t *mtr, bool *inherit) |
For an insert, checks the locks and does the undo logging if desired. More... | |
static void | btr_cur_prefetch_siblings (buf_block_t *block) |
Prefetch siblings of the leaf for the pessimistic operation. More... | |
dberr_t | btr_cur_optimistic_insert (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, que_thr_t *thr, mtr_t *mtr) |
Tries to perform an insert to a page in an index tree, next to cursor. More... | |
dberr_t | btr_cur_pessimistic_insert (uint32_t flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, que_thr_t *thr, mtr_t *mtr) |
Performs an insert on a page of an index tree. More... | |
static dberr_t | btr_cur_upd_lock_and_undo (ulint flags, btr_cur_t *cursor, const ulint *offsets, const upd_t *update, ulint cmpl_info, que_thr_t *thr, mtr_t *mtr, roll_ptr_t *roll_ptr) |
For an update, checks the locks and does the undo logging. More... | |
void | btr_cur_update_in_place_log (ulint flags, const rec_t *rec, dict_index_t *index, const upd_t *update, trx_id_t trx_id, roll_ptr_t roll_ptr, mtr_t *mtr) |
Writes a redo log record of updating a record in-place. More... | |
const byte * | btr_cur_parse_update_in_place (const byte *ptr, const byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index) |
Parses a redo log record of updating a record in-place. More... | |
bool | btr_cur_update_alloc_zip_func (page_zip_des_t *page_zip, page_cur_t *cursor, dict_index_t *index, ulint *offsets, ulint length, bool create, mtr_t *mtr) |
See if there is enough place in the page modification log to log an update-in-place. More... | |
dberr_t | btr_cur_update_in_place (ulint flags, btr_cur_t *cursor, ulint *offsets, const upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, mtr_t *mtr) |
Updates a record when the update causes no size changes in its fields. More... | |
bool | materialize_instant_default (const dict_index_t *index, const rec_t *rec) |
If default value of INSTANT ADD column is to be materialize in updated row. More... | |
dberr_t | btr_cur_optimistic_update (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, const upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, mtr_t *mtr) |
Tries to update a record on a page in an index tree. More... | |
static void | btr_cur_pess_upd_restore_supremum (buf_block_t *block, const rec_t *rec, mtr_t *mtr) |
If, in a split, a new supremum record was created as the predecessor of the updated record, the supremum record must inherit exactly the locks on the updated record. More... | |
dberr_t | btr_cur_pessimistic_update (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **offsets_heap, mem_heap_t *entry_heap, big_rec_t **big_rec, upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, undo_no_t undo_no, mtr_t *mtr, btr_pcur_t *pcur) |
Performs an update of a record on a page of a tree. More... | |
static void | btr_cur_del_mark_set_clust_rec_log (rec_t *rec, dict_index_t *index, trx_id_t trx_id, roll_ptr_t roll_ptr, mtr_t *mtr) |
Writes the redo log record for delete marking or unmarking of an index record. More... | |
const byte * | btr_cur_parse_del_mark_set_clust_rec (const byte *ptr, const byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index) |
Parses the redo log record for delete marking or unmarking of a clustered index record. More... | |
dberr_t | btr_cur_del_mark_set_clust_rec (ulint flags, buf_block_t *block, rec_t *rec, dict_index_t *index, const ulint *offsets, que_thr_t *thr, const dtuple_t *entry, mtr_t *mtr) |
Marks a clustered index record deleted. More... | |
static void | btr_cur_del_mark_set_sec_rec_log (rec_t *rec, bool val, mtr_t *mtr) |
Writes the redo log record for a delete mark setting of a secondary index record. More... | |
const byte * | btr_cur_parse_del_mark_set_sec_rec (const byte *ptr, const byte *end_ptr, page_t *page, page_zip_des_t *page_zip) |
Parses the redo log record for delete marking or unmarking of a secondary index record. More... | |
dberr_t | btr_cur_del_mark_set_sec_rec (ulint flags, btr_cur_t *cursor, bool val, que_thr_t *thr, mtr_t *mtr) |
Sets a secondary index record delete mark to true or false. More... | |
void | btr_cur_set_deleted_flag_for_ibuf (rec_t *rec, page_zip_des_t *page_zip, bool val, mtr_t *mtr) |
Sets a secondary index record's delete mark to the given value. More... | |
bool | btr_cur_compress_if_useful (btr_cur_t *cursor, bool adjust, mtr_t *mtr) |
Tries to compress a page of the tree if it seems useful. More... | |
bool | btr_cur_optimistic_delete_func (btr_cur_t *cursor, ulint flags, mtr_t *mtr) |
bool | btr_cur_pessimistic_delete (dberr_t *err, bool has_reserved_extents, btr_cur_t *cursor, uint32_t flags, bool rollback, trx_id_t trx_id, undo_no_t undo_no, ulint rec_type, mtr_t *mtr, btr_pcur_t *pcur, purge_node_t *node) |
Removes the record on which the tree cursor is positioned. More... | |
static int64_t | btr_estimate_n_rows_in_range_on_level (dict_index_t *index, btr_path_t *slot1, btr_path_t *slot2, int64_t n_rows_on_prev_level, bool *is_n_rows_exact) |
Estimate the number of rows between slot1 and slot2 for any level on a B-tree. More... | |
static int64_t | btr_estimate_n_rows_in_range_low (dict_index_t *index, const dtuple_t *tuple1, page_cur_mode_t mode1, const dtuple_t *tuple2, page_cur_mode_t mode2, unsigned nth_attempt) |
Estimates the number of rows in a given index range. More... | |
int64_t | btr_estimate_n_rows_in_range (dict_index_t *index, const dtuple_t *tuple1, page_cur_mode_t mode1, const dtuple_t *tuple2, page_cur_mode_t mode2) |
Estimates the number of rows in a given index range. More... | |
static void | btr_record_not_null_field_in_rec (const dict_index_t *index, ulint n_unique, const ulint *offsets, uint64_t *n_not_null) |
Record the number of non_null key values in a given index for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). More... | |
bool | btr_estimate_number_of_different_key_vals (dict_index_t *index) |
Estimates the number of different key values in a given index, for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). More... | |
Variables | |
constexpr uint32_t | BTR_CUR_FINE_HISTORY_LENGTH = 100000 |
For the index->lock scalability improvement, only possibility of clear performance regression observed was caused by grown huge history list length. More... | |
ulint | btr_cur_n_non_sea = 0 |
Number of searches down the B-tree in btr_cur_search_to_nth_level(). More... | |
ulint | btr_cur_n_sea = 0 |
Number of successful adaptive hash index lookups in btr_cur_search_to_nth_level(). More... | |
ulint | btr_cur_n_non_sea_old = 0 |
Old value of btr_cur_n_non_sea. More... | |
ulint | btr_cur_n_sea_old = 0 |
Old value of btr_cur_n_sea. More... | |
uint | btr_cur_limit_optimistic_insert_debug = 0 |
static const unsigned | rows_in_range_max_retries = 4 |
If the tree gets changed too much between the two dives for the left and right boundary then btr_estimate_n_rows_in_range_low() will retry that many times before giving up and returning the value stored in rows_in_range_arbitrary_ret_val. More... | |
static const int64_t | rows_in_range_arbitrary_ret_val = 10 |
We pretend that a range has that many records if the tree keeps changing for rows_in_range_max_retries retries while we try to estimate the records in a given range. More... | |
The index tree cursor.
All changes that row operations make to a B-tree or the records there must go through this module! Undo log records are written here of every modify or insert of a clustered index record.
NOTE!!!
To make sure we do not run out of disk space during a pessimistic insert or update, we have to reserve 2 x the height of the index tree many pages in the tablespace before we start the operation, because if leaf splitting has been started, it is difficult to undo, except by crashing the database and doing a roll-forward.
Created 10/16/1994 Heikki Tuuri
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) |
In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized.
enum btr_intention_t |
enum btr_op_t |
Adds path information to the cursor for the current page, for which the binary search has been performed.
[in,out] | cursor | Cursor positioned on a page. |
[in] | height | Height of the page in the tree; 0 means leaf. |
[in] | root_height | Root node height in true. |
Tries to compress a page of the tree if it seems useful.
It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist. NOTE: it is assumed that the caller has reserved enough free extents so that the compression will always succeed if done!
cursor | in/out: cursor on the page to compress; cursor does not stay valid if !adjust and compression occurs |
adjust | in: true if should adjust the cursor position even if compression occurs |
mtr | in/out: mini-transaction |
dberr_t btr_cur_del_mark_set_clust_rec | ( | ulint | flags, |
buf_block_t * | block, | ||
rec_t * | rec, | ||
dict_index_t * | index, | ||
const ulint * | offsets, | ||
que_thr_t * | thr, | ||
const dtuple_t * | entry, | ||
mtr_t * | mtr | ||
) |
Marks a clustered index record deleted.
Writes an undo log record to undo log on this delete marking. Writes in the trx id field the id of the deleting transaction, and in the roll ptr field pointer to the undo log record created.
flags | in: undo logging and locking flags |
block | in/out: buffer block of the record |
rec | in/out: record |
index | in: clustered index of the record |
offsets | in: rec_get_offsets(rec) |
thr | in: query thread |
entry | in: dtuple for the deleting record, also contains the virtual cols if there are any |
mtr | in/out: mini-transaction |
|
inlinestatic |
Writes the redo log record for delete marking or unmarking of an index record.
rec | in: record |
index | in: index of the record |
trx_id | in: transaction id |
roll_ptr | in: roll ptr to the undo log record |
mtr | in: mtr |
dberr_t btr_cur_del_mark_set_sec_rec | ( | ulint | flags, |
btr_cur_t * | cursor, | ||
bool | val, | ||
que_thr_t * | thr, | ||
mtr_t * | mtr | ||
) |
Sets a secondary index record delete mark to true or false.
flags | in: locking flag |
cursor | in: cursor |
val | in: value to set |
thr | in: query thread |
mtr | in/out: mini-transaction |
Writes the redo log record for a delete mark setting of a secondary index record.
rec | in: record |
val | in: value to set |
mtr | in: mtr |
|
static |
Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode.
latch_mode | in/out: pointer to latch_mode |
|
inlinestatic |
For an insert, checks the locks and does the undo logging if desired.
flags | in: undo logging and locking flags: if not zero, the parameters index and thr should be specified |
cursor | in: cursor on page after which to insert |
entry | in/out: entry to insert |
thr | in: query thread or NULL |
mtr | in/out: mini-transaction |
inherit | out: true if the inserted new record maybe should inherit LOCK_GAP type locks from the successor record |
|
static |
Inserts a record if there is enough space, or if enough space can be freed by reorganizing.
Differs from btr_cur_optimistic_insert because no heuristics is applied to whether it pays to use CPU time for reorganizing the page or not.
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit().
cursor | in: cursor on page after which to insert; cursor stays valid |
tuple | in: tuple to insert; the size info need not have been stored to tuple |
offsets | out: offsets on *rec |
heap | in/out: pointer to memory heap, or NULL |
mtr | in/out: mini-transaction |
|
static |
Gets the desired latch type for the root leaf (root page is root leaf) at the latch mode.
latch_mode | in: BTR_SEARCH_LEAF, ... |
btr_latch_leaves_t btr_cur_latch_leaves | ( | buf_block_t * | block, |
const page_id_t & | page_id, | ||
const page_size_t & | page_size, | ||
ulint | latch_mode, | ||
btr_cur_t * | cursor, | ||
mtr_t * | mtr | ||
) |
Latches the leaf page or pages requested.
[in] | block | Leaf page where the search converged |
[in] | page_id | Page id of the leaf |
[in] | page_size | Page size |
[in] | latch_mode | BTR_SEARCH_LEAF, ... |
[in] | cursor | Cursor |
[in] | mtr | Mini-transaction |
|
static |
Detects whether the modifying record might need a opposite modification to the intention.
[in] | page | page |
[in] | lock_intention | lock intention for the tree operation |
[in] | rec | record (current node_ptr) |
void btr_cur_open_at_index_side | ( | bool | from_left, |
dict_index_t * | index, | ||
ulint | latch_mode, | ||
btr_cur_t * | cursor, | ||
ulint | level, | ||
ut::Location | location, | ||
mtr_t * | mtr | ||
) |
Opens a cursor at either end of an index.
[in] | from_left | True if open to the low end, false if to the high end |
[in] | index | Index |
[in] | latch_mode | Latch mode |
[in,out] | cursor | Cursor |
[in] | level | Level to search for (0=leaf) |
[in] | location | Location where called |
[in,out] | mtr | Mini-transaction |
void btr_cur_open_at_index_side_with_no_latch | ( | bool | from_left, |
dict_index_t * | index, | ||
btr_cur_t * | cursor, | ||
ulint | level, | ||
ut::Location | location, | ||
mtr_t * | mtr | ||
) |
Opens a cursor at either end of an index.
Avoid taking latches on buffer, just pin (by incrementing fix_count) to keep them in buffer pool. This mode is used by intrinsic table as they are not shared and so there is no need of latching.
[in] | from_left | true if open to low end, false if open to high end. |
[in] | index | Index |
[in,out] | cursor | Cursor |
[in] | level | Level to search for (0=leaf) |
[in] | location | Location where called |
[in,out] | mtr | Mini-transaction |
bool btr_cur_open_at_rnd_pos | ( | dict_index_t * | index, |
ulint | latch_mode, | ||
btr_cur_t * | cursor, | ||
const char * | file, | ||
ulint | line, | ||
mtr_t * | mtr | ||
) |
Positions a cursor at a randomly chosen position within a B-tree.
index | in: index |
latch_mode | in: BTR_SEARCH_LEAF, ... |
cursor | in/out: B-tree cursor |
file | in: file name |
line | in: line where called |
mtr | in: mtr |
dberr_t btr_cur_optimistic_insert | ( | ulint | flags, |
btr_cur_t * | cursor, | ||
ulint ** | offsets, | ||
mem_heap_t ** | heap, | ||
dtuple_t * | entry, | ||
rec_t ** | rec, | ||
big_rec_t ** | big_rec, | ||
que_thr_t * | thr, | ||
mtr_t * | mtr | ||
) |
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does not succeed if there is too little space on the page. If there is just one record on the page, the insert will always succeed; this is to prevent trying to split a page with just one record.
flags | in: undo logging and locking flags: if not zero, the parameters index and thr should be specified |
cursor | in: cursor on page after which to insert; cursor stays valid |
offsets | out: offsets on *rec |
heap | in/out: pointer to memory heap, or NULL |
entry | in/out: entry to insert |
rec | out: pointer to inserted record if succeed |
big_rec | out: big rec vector whose fields have to be stored externally by the caller, or NULL |
thr | in: query thread or NULL |
mtr | in/out: mini-transaction; if this function returns DB_SUCCESS on a leaf page of a secondary index in a compressed tablespace, the caller must mtr_commit(mtr) before latching any further pages |
bool btr_cur_optimistic_latch_leaves | ( | buf_block_t * | block, |
uint64_t | modify_clock, | ||
ulint * | latch_mode, | ||
btr_cur_t * | cursor, | ||
const char * | file, | ||
ulint | line, | ||
mtr_t * | mtr | ||
) |
Optimistically latches the leaf page or pages requested.
[in] | block | Guessed buffer block |
[in] | modify_clock | Modify clock value |
[in,out] | latch_mode | BTR_SEARCH_LEAF, ... |
[in,out] | cursor | Cursor |
[in] | file | File name |
[in] | line | Line where called |
[in] | mtr | Mini-transaction |
dberr_t btr_cur_optimistic_update | ( | ulint | flags, |
btr_cur_t * | cursor, | ||
ulint ** | offsets, | ||
mem_heap_t ** | heap, | ||
const upd_t * | update, | ||
ulint | cmpl_info, | ||
que_thr_t * | thr, | ||
trx_id_t | trx_id, | ||
mtr_t * | mtr | ||
) |
Tries to update a record on a page in an index tree.
It is assumed that mtr holds an x-latch on the page. The operation does not succeed if there is too little space on the page or if the update would result in too empty a page, so that tree compression is recommended. We assume here that the ordering fields of the record do not change.
[in] | flags | undo logging and locking flags |
[in] | cursor | cursor on the record to update; cursor stays valid and positioned on the same record |
[out] | offsets | offsets on cursor->page_cur.rec |
[in,out] | heap | pointer to nullptr or memory heap |
[in] | update | update vector; this must also contain trx id and roll ptr fields |
[in] | cmpl_info | compiler info on secondary index updates |
[in] | thr | query thread, or nullptr if flags & (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG) |
[in] | trx_id | transaction id |
[in,out] | mtr | mini-transaction; if this is a secondary index, the caller must mtr_commit(mtr) before latching any further pages |
DB_SUCCESS | on success |
DB_OVERFLOW | if the updated record does not fit |
DB_UNDERFLOW | if the page would become too empty |
DB_ZIP_OVERFLOW | if there is not enough space left on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) |
const byte * btr_cur_parse_del_mark_set_clust_rec | ( | const byte * | ptr, |
const byte * | end_ptr, | ||
page_t * | page, | ||
page_zip_des_t * | page_zip, | ||
dict_index_t * | index | ||
) |
Parses the redo log record for delete marking or unmarking of a clustered index record.
ptr | in: buffer |
end_ptr | in: buffer end |
page | in/out: page or NULL |
page_zip | in/out: compressed page, or NULL |
index | in: index corresponding to page |
const byte * btr_cur_parse_del_mark_set_sec_rec | ( | const byte * | ptr, |
const byte * | end_ptr, | ||
page_t * | page, | ||
page_zip_des_t * | page_zip | ||
) |
Parses the redo log record for delete marking or unmarking of a secondary index record.
ptr | in: buffer |
end_ptr | in: buffer end |
page | in/out: page or NULL |
page_zip | in/out: compressed page, or NULL |
const byte * btr_cur_parse_update_in_place | ( | const byte * | ptr, |
const byte * | end_ptr, | ||
page_t * | page, | ||
page_zip_des_t * | page_zip, | ||
dict_index_t * | index | ||
) |
Parses a redo log record of updating a record in-place.
ptr | in: buffer |
end_ptr | in: buffer end |
page | in/out: page or NULL |
page_zip | in/out: compressed page, or NULL |
index | in: index corresponding to page |
|
static |
If, in a split, a new supremum record was created as the predecessor of the updated record, the supremum record must inherit exactly the locks on the updated record.
In the split it may have inherited locks from the successor of the updated record, which is not correct. This function restores the right locks for the new supremum.
block | in: buffer block of rec |
rec | in: updated record |
mtr | in: mtr |
bool btr_cur_pessimistic_delete | ( | dberr_t * | err, |
bool | has_reserved_extents, | ||
btr_cur_t * | cursor, | ||
uint32_t | flags, | ||
bool | rollback, | ||
trx_id_t | trx_id, | ||
undo_no_t | undo_no, | ||
ulint | rec_type, | ||
mtr_t * | mtr, | ||
btr_pcur_t * | pcur, | ||
purge_node_t * | node | ||
) |
Removes the record on which the tree cursor is positioned.
Tries to compress the page if its fillfactor drops below a threshold or if it is the only page on the level. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.
[out] | err | DB_SUCCESS or DB_OUT_OF_FILE_SPACE; the latter may occur because we may have to update node pointers on upper levels, and in the case of variable length keys these may actually grow in size |
[in] | has_reserved_extents | true if the caller has already reserved enough free extents so that he knows that the operation will succeed |
[in] | cursor | Cursor on the record to delete; if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit |
[in] | flags | BTR_CREATE_FLAG or 0 |
[in] | rollback | True if performing rollback, false otherwise. |
[in] | trx_id | The current transaction id. |
[in] | undo_no | Undo number of the transaction. This is needed for rollback to savepoint of partially updated LOB. |
[in] | rec_type | Undo record type. |
[in] | mtr | The mini transaction |
[in] | pcur | Persistent cursor on the record to delete. |
[in,out] | node | purge node or nullptr |
dberr_t btr_cur_pessimistic_insert | ( | uint32_t | flags, |
btr_cur_t * | cursor, | ||
ulint ** | offsets, | ||
mem_heap_t ** | heap, | ||
dtuple_t * | entry, | ||
rec_t ** | rec, | ||
big_rec_t ** | big_rec, | ||
que_thr_t * | thr, | ||
mtr_t * | mtr | ||
) |
Performs an insert on a page of an index tree.
It is assumed that mtr holds an x-latch on the tree and on the cursor page. If the insert is made on the leaf level, to avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.
flags | in: undo logging and locking flags: if not zero, the parameter thr should be specified; if no undo logging is specified, then the caller must have reserved enough free extents in the file space so that the insertion will certainly succeed |
cursor | in: cursor after which to insert; cursor stays valid |
offsets | out: offsets on *rec |
heap | in/out: pointer to memory heap that can be emptied, or NULL |
entry | in/out: entry to insert |
rec | out: pointer to inserted record if succeed |
big_rec | out: big rec vector whose fields have to be stored externally by the caller, or NULL |
thr | in: query thread or NULL |
mtr | in/out: mini-transaction |
dberr_t btr_cur_pessimistic_update | ( | ulint | flags, |
btr_cur_t * | cursor, | ||
ulint ** | offsets, | ||
mem_heap_t ** | offsets_heap, | ||
mem_heap_t * | entry_heap, | ||
big_rec_t ** | big_rec, | ||
upd_t * | update, | ||
ulint | cmpl_info, | ||
que_thr_t * | thr, | ||
trx_id_t | trx_id, | ||
undo_no_t | undo_no, | ||
mtr_t * | mtr, | ||
btr_pcur_t * | pcur = nullptr |
||
) |
Performs an update of a record on a page of a tree.
It is assumed that mtr holds an x-latch on the tree and on the cursor page. If the update is made on the leaf level, to avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.
[in] | flags | Undo logging, locking, and rollback flags |
[in,out] | cursor | cursor on the record to update; cursor may become invalid if *big_rec == NULL || !(flags & BTR_KEEP_POS_FLAG) |
[out] | offsets | Offsets on cursor->page_cur.rec |
[in,out] | offsets_heap | Pointer to memory heap that can be emptied, or NULL |
[in,out] | entry_heap | Memory heap for allocating big_rec and the index tuple. |
[out] | big_rec | Big rec vector whose fields have to be stored externally by the caller, or NULL |
[in,out] | update | Update vector; this is allowed to also contain trx id and roll ptr fields. Non-updated columns that are moved offpage will be appended to this. |
[in] | cmpl_info | Compiler info on secondary index updates |
[in] | thr | Query thread, or NULL if flags & (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG) |
[in] | trx_id | Transaction id |
[in] | undo_no | Undo number of the transaction. This is needed for rollback to savepoint of partially updated LOB. |
[in,out] | mtr | Mini-transaction; must be committed before latching any further pages |
[in] | pcur | The persistent cursor on the record to update. |
|
static |
Prefetch siblings of the leaf for the pessimistic operation.
block | leaf page |
void btr_cur_search_to_nth_level | ( | dict_index_t * | index, |
ulint | level, | ||
const dtuple_t * | tuple, | ||
page_cur_mode_t | mode, | ||
ulint | latch_mode, | ||
btr_cur_t * | cursor, | ||
ulint | has_search_latch, | ||
const char * | file, | ||
ulint | line, | ||
mtr_t * | mtr | ||
) |
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the search tuple should be performed in the B-tree. InnoDB does an insert immediately after the cursor. Thus, the cursor may end up on a user record, or on a page infimum record.
index | in: index |
level | in: the tree level of search |
tuple | in: data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! |
mode | in: PAGE_CUR_L, ...; Inserts should always be made using PAGE_CUR_LE to search the position! |
latch_mode | in: BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, BTR_DELETE_MARK, BTR_DELETE, or BTR_ESTIMATE; cursor->left_block is used to store a pointer to the left neighbor page, in the cases BTR_SEARCH_PREV and BTR_MODIFY_PREV; NOTE that if has_search_latch is != 0, we maybe do not have a latch set on the cursor page, we assume the caller uses his search latch to protect the record! |
cursor | in/out: tree cursor; the cursor page is s- or x-latched, but see also above! |
has_search_latch | in: info on the latch mode the caller currently has on search system: RW_S_LATCH, or 0 |
file | in: file name |
line | in: line where called |
mtr | in: mtr |
void btr_cur_search_to_nth_level_with_no_latch | ( | dict_index_t * | index, |
ulint | level, | ||
const dtuple_t * | tuple, | ||
page_cur_mode_t | mode, | ||
btr_cur_t * | cursor, | ||
const char * | file, | ||
ulint | line, | ||
mtr_t * | mtr, | ||
bool | mark_dirty | ||
) |
Searches an index tree and positions a tree cursor on a given level.
This function will avoid placing latches while traversing the path and so should be used only for cases where-in latching is not needed.
[in] | index | Index |
[in] | level | The tree level of search |
[in] | tuple | Data tuple; Note: n_fields_cmp in compared to the node ptr page node field |
[in] | mode | PAGE_CUR_L, .... Insert should always be made using PAGE_CUR_LE to search the position. |
[in,out] | cursor | Tree cursor; points to record of interest. |
[in] | file | File name |
[in] | line | Line where called from |
[in,out] | mtr | Mini-transaction |
[in] | mark_dirty | if true then mark the block as dirty |
void btr_cur_set_deleted_flag_for_ibuf | ( | rec_t * | rec, |
page_zip_des_t * | page_zip, | ||
bool | val, | ||
mtr_t * | mtr | ||
) |
Sets a secondary index record's delete mark to the given value.
This function is only used by the insert buffer merge mechanism.
rec | in/out: record |
page_zip | in/out: compressed page corresponding to rec, or NULL when the tablespace is uncompressed |
val | in: value to set |
mtr | in/out: mini-transaction |
|
inlinestatic |
For an update, checks the locks and does the undo logging.
flags | in: undo logging and locking flags |
cursor | in: cursor on record to update |
offsets | in: rec_get_offsets() on cursor |
update | in: update vector |
cmpl_info | in: compiler info on secondary index updates |
thr | in: query thread (can be NULL if BTR_NO_LOCKING_FLAG) |
mtr | in/out: mini-transaction |
roll_ptr | out: roll pointer |
bool btr_cur_update_alloc_zip_func | ( | page_zip_des_t * | page_zip, |
page_cur_t * | cursor, | ||
dict_index_t * | index, | ||
ulint * | offsets, | ||
ulint | length, | ||
bool | create, | ||
mtr_t * | mtr | ||
) |
See if there is enough place in the page modification log to log an update-in-place.
[in,out] | page_zip | Compressed page. |
[in,out] | cursor | B-tree page cursor. |
[in] | index | The index corresponding to cursor. |
[in,out] | offsets | Offsets of the cursor record. |
[in] | length | size needed |
[in] | create | true=delete-and-insert, false=update-in-place |
[in,out] | mtr | Mini-transaction. |
false | if out of space; IBUF_BITMAP_FREE will be reset outside mtr if the page was re-compressed |
true | if enough place; |
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is a secondary index leaf page. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(mtr).
dberr_t btr_cur_update_in_place | ( | ulint | flags, |
btr_cur_t * | cursor, | ||
ulint * | offsets, | ||
const upd_t * | update, | ||
ulint | cmpl_info, | ||
que_thr_t * | thr, | ||
trx_id_t | trx_id, | ||
mtr_t * | mtr | ||
) |
Updates a record when the update causes no size changes in its fields.
[in] | flags | Undo logging and locking flags |
[in] | cursor | Cursor on the record to update; cursor stays valid and positioned on the same record |
[in,out] | offsets | Offsets on cursor->page_cur.rec |
[in] | update | Update vector |
[in] | cmpl_info | Compiler info on secondary index updates |
[in] | thr | Query thread, or null if flags & (btr_no_locking_flag | btr_no_undo_log_flag | btr_create_flag | btr_keep_sys_flag) |
[in] | trx_id | Transaction id |
[in,out] | mtr | Mini-transaction; if this is a secondary index, the caller must mtr_commit(mtr) before latching any further pages |
DB_SUCCESS | on success |
DB_ZIP_OVERFLOW | if there is not enough space left on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) |
void btr_cur_update_in_place_log | ( | ulint | flags, |
const rec_t * | rec, | ||
dict_index_t * | index, | ||
const upd_t * | update, | ||
trx_id_t | trx_id, | ||
roll_ptr_t | roll_ptr, | ||
mtr_t * | mtr | ||
) |
Writes a redo log record of updating a record in-place.
[in] | flags | Undo logging and locking flags |
[in] | rec | Record |
[in] | index | Index of the record |
[in] | update | Update vector |
[in] | trx_id | Transaction id |
[in] | roll_ptr | Roll ptr |
[in] | mtr | Mini-transaction |
|
static |
Detects whether the modifying record might need a modifying tree structure.
[in] | index | index |
[in] | page | page |
[in] | lock_intention | lock intention for the tree operation |
[in] | rec | record (current node_ptr) |
[in] | rec_size | size of the record or max size of node_ptr |
[in] | page_size | page size |
[in] | mtr | mtr |
int64_t btr_estimate_n_rows_in_range | ( | dict_index_t * | index, |
const dtuple_t * | tuple1, | ||
page_cur_mode_t | mode1, | ||
const dtuple_t * | tuple2, | ||
page_cur_mode_t | mode2 | ||
) |
Estimates the number of rows in a given index range.
[in] | index | index |
[in] | tuple1 | range start, may also be empty tuple |
[in] | mode1 | search mode for range start |
[in] | tuple2 | range end, may also be empty tuple |
[in] | mode2 | search mode for range end |
|
static |
Estimates the number of rows in a given index range.
[in] | index | index |
[in] | tuple1 | range start, may also be empty tuple |
[in] | mode1 | search mode for range start |
[in] | tuple2 | range end, may also be empty tuple |
[in] | mode2 | search mode for range end |
[in] | nth_attempt | if the tree gets modified too much while we are trying to analyze it, then we will retry (this function will call itself, incrementing this parameter) |
|
static |
Estimate the number of rows between slot1 and slot2 for any level on a B-tree.
This function starts from slot1->page and reads a few pages to the right, counting their records. If we reach slot2->page quickly then we know exactly how many records there are between slot1 and slot2 and we set is_n_rows_exact to true. If we cannot reach slot2->page quickly then we calculate the average number of records in the pages scanned so far and assume that all pages that we did not scan up to slot2->page contain the same number of records, then we multiply that average to the number of pages between slot1->page and slot2->page (which is n_rows_on_prev_level). In this case we set is_n_rows_exact to false.
index | in: index |
slot1 | in: left border |
slot2 | in: right border |
n_rows_on_prev_level | in: number of rows on the previous level for the same descend paths; used to determine the number of pages on this level |
is_n_rows_exact | out: true if the returned value is exact i.e. not an estimation |
bool btr_estimate_number_of_different_key_vals | ( | dict_index_t * | index | ) |
Estimates the number of different key values in a given index, for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed 0..n_uniq-1) and the number of pages that were sampled is saved in index->stat_n_sample_sizes[]. If innodb_stats_method is nulls_ignored, we also record the number of non-null values for each prefix and stored the estimates in array index->stat_n_non_null_key_vals.
index | in: index |
|
static |
Record the number of non_null key values in a given index for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
The estimates are eventually stored in the array: index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1.
[in] | index | index |
[in] | n_unique | dict_index_get_n_unique(index), number of columns uniquely determine an index entry |
[in] | offsets | rec_get_offsets(rec, index), its size could be for all fields or that of "n_unique" |
[in,out] | n_not_null | array to record number of not null rows for n-column prefix |
|
constexpr |
Estimated table level stats from sampled value.
value | sampled stats |
index | index being sampled |
sample | number of sampled rows |
ext_size | external stored data size |
not_empty | table not empty |
bool materialize_instant_default | ( | const dict_index_t * | index, |
const rec_t * | rec | ||
) |
If default value of INSTANT ADD column is to be materialize in updated row.
[in] | index | record descriptor |
[in] | rec | record |
|
constexpr |
For the index->lock scalability improvement, only possibility of clear performance regression observed was caused by grown huge history list length.
That is because the exclusive use of index->lock also worked as reserving free blocks and read IO bandwidth with priority. To avoid huge glowing history list as same level with previous implementation, prioritizes pessimistic tree operations by purge as the previous, when it seems to be growing huge.
Experimentally, the history list length starts to affect to performance throughput clearly from about 100000.
uint btr_cur_limit_optimistic_insert_debug = 0 |
ulint btr_cur_n_non_sea = 0 |
Number of searches down the B-tree in btr_cur_search_to_nth_level().
ulint btr_cur_n_non_sea_old = 0 |
Old value of btr_cur_n_non_sea.
Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor().
ulint btr_cur_n_sea = 0 |
Number of successful adaptive hash index lookups in btr_cur_search_to_nth_level().
ulint btr_cur_n_sea_old = 0 |
Old value of btr_cur_n_sea.
Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor().
|
static |
We pretend that a range has that many records if the tree keeps changing for rows_in_range_max_retries retries while we try to estimate the records in a given range.
|
static |
If the tree gets changed too much between the two dives for the left and right boundary then btr_estimate_n_rows_in_range_low() will retry that many times before giving up and returning the value stored in rows_in_range_arbitrary_ret_val.