MySQL 8.0.40
Source Code Documentation
btr0cur.cc File Reference

The index tree cursor. More...

#include "btr0cur.h"
#include <assert.h>
#include "my_dbug.h"
#include <zlib.h>
#include "btr0btr.h"
#include "btr0sea.h"
#include "buf0lru.h"
#include "current_thd.h"
#include "debug_sync.h"
#include "ibuf0ibuf.h"
#include "lob0lob.h"
#include "lock0lock.h"
#include "mtr0log.h"
#include "row0upd.h"
#include "page0page.h"
#include "page0zip.h"
#include "que0que.h"
#include "rem0cmp.h"
#include "rem0rec.h"
#include "row0log.h"
#include "row0purge.h"
#include "row0row.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "trx0rec.h"
#include "trx0roll.h"
#include <array>

Macros

#define BTR_CUR_PAGE_REORGANIZE_LIMIT   (UNIV_PAGE_SIZE / 32)
 In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized. More...
 

Enumerations

enum  btr_op_t {
  BTR_NO_OP = 0 , BTR_INSERT_OP , BTR_INSERT_IGNORE_UNIQUE_OP , BTR_DELETE_OP ,
  BTR_DELMARK_OP
}
 Buffered B-tree operation types, introduced as part of delete buffering. More...
 
enum  btr_intention_t { BTR_INTENTION_DELETE , BTR_INTENTION_BOTH , BTR_INTENTION_INSERT }
 Modification types for the B-tree operation. More...
 

Functions

constexpr uint64_t BTR_TABLE_STATS_FROM_SAMPLE (uint64_t value, dict_index_t *index, uint64_t sample, ulint ext_size, ulint not_empty)
 Estimated table level stats from sampled value. More...
 
static void btr_cur_add_path_info (btr_cur_t *cursor, ulint height, ulint root_height)
 Adds path information to the cursor for the current page, for which the binary search has been performed. More...
 
btr_latch_leaves_t btr_cur_latch_leaves (buf_block_t *block, const page_id_t &page_id, const page_size_t &page_size, ulint latch_mode, btr_cur_t *cursor, mtr_t *mtr)
 Latches the leaf page or pages requested. More...
 
bool btr_cur_optimistic_latch_leaves (buf_block_t *block, uint64_t modify_clock, ulint *latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr)
 Optimistically latches the leaf page or pages requested. More...
 
static btr_intention_t btr_cur_get_and_clear_intention (ulint *latch_mode)
 Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. More...
 
static rw_lock_type_t btr_cur_latch_for_root_leaf (ulint latch_mode)
 Gets the desired latch type for the root leaf (root page is root leaf) at the latch mode. More...
 
static bool btr_cur_will_modify_tree (dict_index_t *index, const page_t *page, btr_intention_t lock_intention, const rec_t *rec, ulint rec_size, const page_size_t &page_size, mtr_t *mtr)
 Detects whether the modifying record might need a modifying tree structure. More...
 
static bool btr_cur_need_opposite_intention (const page_t *page, btr_intention_t lock_intention, const rec_t *rec)
 Detects whether the modifying record might need a opposite modification to the intention. More...
 
void btr_cur_search_to_nth_level (dict_index_t *index, ulint level, const dtuple_t *tuple, page_cur_mode_t mode, ulint latch_mode, btr_cur_t *cursor, ulint has_search_latch, const char *file, ulint line, mtr_t *mtr)
 Searches an index tree and positions a tree cursor on a given level. More...
 
void btr_cur_search_to_nth_level_with_no_latch (dict_index_t *index, ulint level, const dtuple_t *tuple, page_cur_mode_t mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr, bool mark_dirty)
 Searches an index tree and positions a tree cursor on a given level. More...
 
void btr_cur_open_at_index_side (bool from_left, dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, ulint level, ut::Location location, mtr_t *mtr)
 Opens a cursor at either end of an index. More...
 
void btr_cur_open_at_index_side_with_no_latch (bool from_left, dict_index_t *index, btr_cur_t *cursor, ulint level, ut::Location location, mtr_t *mtr)
 Opens a cursor at either end of an index. More...
 
bool btr_cur_open_at_rnd_pos (dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr)
 Positions a cursor at a randomly chosen position within a B-tree. More...
 
static rec_tbtr_cur_insert_if_possible (btr_cur_t *cursor, const dtuple_t *tuple, ulint **offsets, mem_heap_t **heap, mtr_t *mtr)
 Inserts a record if there is enough space, or if enough space can be freed by reorganizing. More...
 
static dberr_t btr_cur_ins_lock_and_undo (ulint flags, btr_cur_t *cursor, dtuple_t *entry, que_thr_t *thr, mtr_t *mtr, bool *inherit)
 For an insert, checks the locks and does the undo logging if desired. More...
 
static void btr_cur_prefetch_siblings (buf_block_t *block)
 Prefetch siblings of the leaf for the pessimistic operation. More...
 
dberr_t btr_cur_optimistic_insert (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, que_thr_t *thr, mtr_t *mtr)
 Tries to perform an insert to a page in an index tree, next to cursor. More...
 
dberr_t btr_cur_pessimistic_insert (uint32_t flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, que_thr_t *thr, mtr_t *mtr)
 Performs an insert on a page of an index tree. More...
 
static dberr_t btr_cur_upd_lock_and_undo (ulint flags, btr_cur_t *cursor, const ulint *offsets, const upd_t *update, ulint cmpl_info, que_thr_t *thr, mtr_t *mtr, roll_ptr_t *roll_ptr)
 For an update, checks the locks and does the undo logging. More...
 
void btr_cur_update_in_place_log (ulint flags, const rec_t *rec, dict_index_t *index, const upd_t *update, trx_id_t trx_id, roll_ptr_t roll_ptr, mtr_t *mtr)
 Writes a redo log record of updating a record in-place. More...
 
bytebtr_cur_parse_update_in_place (byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index)
 Parses a redo log record of updating a record in-place. More...
 
bool btr_cur_update_alloc_zip_func (page_zip_des_t *page_zip, page_cur_t *cursor, dict_index_t *index, ulint *offsets, ulint length, bool create, mtr_t *mtr)
 See if there is enough place in the page modification log to log an update-in-place. More...
 
dberr_t btr_cur_update_in_place (ulint flags, btr_cur_t *cursor, ulint *offsets, const upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, mtr_t *mtr)
 Updates a record when the update causes no size changes in its fields. More...
 
bool materialize_instant_default (const dict_index_t *index, const rec_t *rec)
 If default value of INSTANT ADD column is to be materialize in updated row. More...
 
dberr_t btr_cur_optimistic_update (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **heap, const upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, mtr_t *mtr)
 Tries to update a record on a page in an index tree. More...
 
static void btr_cur_pess_upd_restore_supremum (buf_block_t *block, const rec_t *rec, mtr_t *mtr)
 If, in a split, a new supremum record was created as the predecessor of the updated record, the supremum record must inherit exactly the locks on the updated record. More...
 
dberr_t btr_cur_pessimistic_update (ulint flags, btr_cur_t *cursor, ulint **offsets, mem_heap_t **offsets_heap, mem_heap_t *entry_heap, big_rec_t **big_rec, upd_t *update, ulint cmpl_info, que_thr_t *thr, trx_id_t trx_id, undo_no_t undo_no, mtr_t *mtr, btr_pcur_t *pcur)
 Performs an update of a record on a page of a tree. More...
 
static void btr_cur_del_mark_set_clust_rec_log (rec_t *rec, dict_index_t *index, trx_id_t trx_id, roll_ptr_t roll_ptr, mtr_t *mtr)
 Writes the redo log record for delete marking or unmarking of an index record. More...
 
bytebtr_cur_parse_del_mark_set_clust_rec (byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index)
 Parses the redo log record for delete marking or unmarking of a clustered index record. More...
 
dberr_t btr_cur_del_mark_set_clust_rec (ulint flags, buf_block_t *block, rec_t *rec, dict_index_t *index, const ulint *offsets, que_thr_t *thr, const dtuple_t *entry, mtr_t *mtr)
 Marks a clustered index record deleted. More...
 
static void btr_cur_del_mark_set_sec_rec_log (rec_t *rec, bool val, mtr_t *mtr)
 Writes the redo log record for a delete mark setting of a secondary index record. More...
 
bytebtr_cur_parse_del_mark_set_sec_rec (byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip)
 Parses the redo log record for delete marking or unmarking of a secondary index record. More...
 
dberr_t btr_cur_del_mark_set_sec_rec (ulint flags, btr_cur_t *cursor, bool val, que_thr_t *thr, mtr_t *mtr)
 Sets a secondary index record delete mark to true or false. More...
 
void btr_cur_set_deleted_flag_for_ibuf (rec_t *rec, page_zip_des_t *page_zip, bool val, mtr_t *mtr)
 Sets a secondary index record's delete mark to the given value. More...
 
bool btr_cur_compress_if_useful (btr_cur_t *cursor, bool adjust, mtr_t *mtr)
 Tries to compress a page of the tree if it seems useful. More...
 
bool btr_cur_optimistic_delete_func (btr_cur_t *cursor, ulint flags, mtr_t *mtr)
 
bool btr_cur_pessimistic_delete (dberr_t *err, bool has_reserved_extents, btr_cur_t *cursor, uint32_t flags, bool rollback, trx_id_t trx_id, undo_no_t undo_no, ulint rec_type, mtr_t *mtr, btr_pcur_t *pcur, purge_node_t *node)
 Removes the record on which the tree cursor is positioned. More...
 
static int64_t btr_estimate_n_rows_in_range_on_level (dict_index_t *index, btr_path_t *slot1, btr_path_t *slot2, int64_t n_rows_on_prev_level, bool *is_n_rows_exact)
 Estimate the number of rows between slot1 and slot2 for any level on a B-tree. More...
 
static int64_t btr_estimate_n_rows_in_range_low (dict_index_t *index, const dtuple_t *tuple1, page_cur_mode_t mode1, const dtuple_t *tuple2, page_cur_mode_t mode2, unsigned nth_attempt)
 Estimates the number of rows in a given index range. More...
 
int64_t btr_estimate_n_rows_in_range (dict_index_t *index, const dtuple_t *tuple1, page_cur_mode_t mode1, const dtuple_t *tuple2, page_cur_mode_t mode2)
 Estimates the number of rows in a given index range. More...
 
static void btr_record_not_null_field_in_rec (const dict_index_t *index, ulint n_unique, const ulint *offsets, uint64_t *n_not_null)
 Record the number of non_null key values in a given index for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). More...
 
bool btr_estimate_number_of_different_key_vals (dict_index_t *index)
 Estimates the number of different key values in a given index, for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). More...
 

Variables

constexpr uint32_t BTR_CUR_FINE_HISTORY_LENGTH = 100000
 For the index->lock scalability improvement, only possibility of clear performance regression observed was caused by grown huge history list length. More...
 
ulint btr_cur_n_non_sea = 0
 Number of searches down the B-tree in btr_cur_search_to_nth_level(). More...
 
ulint btr_cur_n_sea = 0
 Number of successful adaptive hash index lookups in btr_cur_search_to_nth_level(). More...
 
ulint btr_cur_n_non_sea_old = 0
 Old value of btr_cur_n_non_sea. More...
 
ulint btr_cur_n_sea_old = 0
 Old value of btr_cur_n_sea. More...
 
uint btr_cur_limit_optimistic_insert_debug = 0
 
static const unsigned rows_in_range_max_retries = 4
 If the tree gets changed too much between the two dives for the left and right boundary then btr_estimate_n_rows_in_range_low() will retry that many times before giving up and returning the value stored in rows_in_range_arbitrary_ret_val. More...
 
static const int64_t rows_in_range_arbitrary_ret_val = 10
 We pretend that a range has that many records if the tree keeps changing for rows_in_range_max_retries retries while we try to estimate the records in a given range. More...
 

Detailed Description

The index tree cursor.

All changes that row operations make to a B-tree or the records there must go through this module! Undo log records are written here of every modify or insert of a clustered index record.

                    NOTE!!!

To make sure we do not run out of disk space during a pessimistic insert or update, we have to reserve 2 x the height of the index tree many pages in the tablespace before we start the operation, because if leaf splitting has been started, it is difficult to undo, except by crashing the database and doing a roll-forward.

Created 10/16/1994 Heikki Tuuri

Macro Definition Documentation

◆ BTR_CUR_PAGE_REORGANIZE_LIMIT

#define BTR_CUR_PAGE_REORGANIZE_LIMIT   (UNIV_PAGE_SIZE / 32)

In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized.

Enumeration Type Documentation

◆ btr_intention_t

Modification types for the B-tree operation.

Note that the order of the enum values is important.

Enumerator
BTR_INTENTION_DELETE 
BTR_INTENTION_BOTH 
BTR_INTENTION_INSERT 

◆ btr_op_t

enum btr_op_t

Buffered B-tree operation types, introduced as part of delete buffering.

Enumerator
BTR_NO_OP 

Not buffered.

BTR_INSERT_OP 

Insert, do not ignore UNIQUE.

BTR_INSERT_IGNORE_UNIQUE_OP 

Insert, ignoring UNIQUE.

BTR_DELETE_OP 

Purge a delete-marked record.

BTR_DELMARK_OP 

Mark a record for deletion.

Function Documentation

◆ btr_cur_add_path_info()

static void btr_cur_add_path_info ( btr_cur_t cursor,
ulint  height,
ulint  root_height 
)
static

Adds path information to the cursor for the current page, for which the binary search has been performed.

Parameters
[in,out]cursorCursor positioned on a page.
[in]heightHeight of the page in the tree; 0 means leaf.
[in]root_heightRoot node height in true.

◆ btr_cur_compress_if_useful()

bool btr_cur_compress_if_useful ( btr_cur_t cursor,
bool  adjust,
mtr_t mtr 
)

Tries to compress a page of the tree if it seems useful.

It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist. NOTE: it is assumed that the caller has reserved enough free extents so that the compression will always succeed if done!

Returns
true if compression occurred
Parameters
cursorin/out: cursor on the page to compress; cursor does not stay valid if !adjust and compression occurs
adjustin: true if should adjust the cursor position even if compression occurs
mtrin/out: mini-transaction

◆ btr_cur_del_mark_set_clust_rec()

dberr_t btr_cur_del_mark_set_clust_rec ( ulint  flags,
buf_block_t block,
rec_t rec,
dict_index_t index,
const ulint offsets,
que_thr_t thr,
const dtuple_t entry,
mtr_t mtr 
)

Marks a clustered index record deleted.

Writes an undo log record to undo log on this delete marking. Writes in the trx id field the id of the deleting transaction, and in the roll ptr field pointer to the undo log record created.

Returns
DB_SUCCESS, DB_LOCK_WAIT, or error number
Parameters
flagsin: undo logging and locking flags
blockin/out: buffer block of the record
recin/out: record
indexin: clustered index of the record
offsetsin: rec_get_offsets(rec)
thrin: query thread
entryin: dtuple for the deleting record, also contains the virtual cols if there are any
mtrin/out: mini-transaction

◆ btr_cur_del_mark_set_clust_rec_log()

static void btr_cur_del_mark_set_clust_rec_log ( rec_t rec,
dict_index_t index,
trx_id_t  trx_id,
roll_ptr_t  roll_ptr,
mtr_t mtr 
)
inlinestatic

Writes the redo log record for delete marking or unmarking of an index record.

Parameters
recin: record
indexin: index of the record
trx_idin: transaction id
roll_ptrin: roll ptr to the undo log record
mtrin: mtr

◆ btr_cur_del_mark_set_sec_rec()

dberr_t btr_cur_del_mark_set_sec_rec ( ulint  flags,
btr_cur_t cursor,
bool  val,
que_thr_t thr,
mtr_t mtr 
)

Sets a secondary index record delete mark to true or false.

Returns
DB_SUCCESS, DB_LOCK_WAIT, or error number
Parameters
flagsin: locking flag
cursorin: cursor
valin: value to set
thrin: query thread
mtrin/out: mini-transaction

◆ btr_cur_del_mark_set_sec_rec_log()

static void btr_cur_del_mark_set_sec_rec_log ( rec_t rec,
bool  val,
mtr_t mtr 
)
inlinestatic

Writes the redo log record for a delete mark setting of a secondary index record.

Parameters
recin: record
valin: value to set
mtrin: mtr

◆ btr_cur_get_and_clear_intention()

static btr_intention_t btr_cur_get_and_clear_intention ( ulint latch_mode)
static

Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode.

Parameters
latch_modein/out: pointer to latch_mode
Returns
intention for latching tree

◆ btr_cur_ins_lock_and_undo()

static dberr_t btr_cur_ins_lock_and_undo ( ulint  flags,
btr_cur_t cursor,
dtuple_t entry,
que_thr_t thr,
mtr_t mtr,
bool *  inherit 
)
inlinestatic

For an insert, checks the locks and does the undo logging if desired.

Returns
DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number
Parameters
flagsin: undo logging and locking flags: if not zero, the parameters index and thr should be specified
cursorin: cursor on page after which to insert
entryin/out: entry to insert
thrin: query thread or NULL
mtrin/out: mini-transaction
inheritout: true if the inserted new record maybe should inherit LOCK_GAP type locks from the successor record

◆ btr_cur_insert_if_possible()

static rec_t * btr_cur_insert_if_possible ( btr_cur_t cursor,
const dtuple_t tuple,
ulint **  offsets,
mem_heap_t **  heap,
mtr_t mtr 
)
static

Inserts a record if there is enough space, or if enough space can be freed by reorganizing.

Differs from btr_cur_optimistic_insert because no heuristics is applied to whether it pays to use CPU time for reorganizing the page or not.

IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit().

Returns
pointer to inserted record if succeed, else NULL
Parameters
cursorin: cursor on page after which to insert; cursor stays valid
tuplein: tuple to insert; the size info need not have been stored to tuple
offsetsout: offsets on *rec
heapin/out: pointer to memory heap, or NULL
mtrin/out: mini-transaction

◆ btr_cur_latch_for_root_leaf()

static rw_lock_type_t btr_cur_latch_for_root_leaf ( ulint  latch_mode)
static

Gets the desired latch type for the root leaf (root page is root leaf) at the latch mode.

Parameters
latch_modein: BTR_SEARCH_LEAF, ...
Returns
latch type

◆ btr_cur_latch_leaves()

btr_latch_leaves_t btr_cur_latch_leaves ( buf_block_t block,
const page_id_t page_id,
const page_size_t page_size,
ulint  latch_mode,
btr_cur_t cursor,
mtr_t mtr 
)

Latches the leaf page or pages requested.

Parameters
[in]blockLeaf page where the search converged
[in]page_idPage id of the leaf
[in]page_sizePage size
[in]latch_modeBTR_SEARCH_LEAF, ...
[in]cursorCursor
[in]mtrMini-transaction
Returns
blocks and savepoints which actually latched.

◆ btr_cur_need_opposite_intention()

static bool btr_cur_need_opposite_intention ( const page_t page,
btr_intention_t  lock_intention,
const rec_t rec 
)
static

Detects whether the modifying record might need a opposite modification to the intention.

Parameters
[in]pagepage
[in]lock_intentionlock intention for the tree operation
[in]recrecord (current node_ptr)
Returns
true if tree modification is needed

◆ btr_cur_open_at_index_side()

void btr_cur_open_at_index_side ( bool  from_left,
dict_index_t index,
ulint  latch_mode,
btr_cur_t cursor,
ulint  level,
ut::Location  location,
mtr_t mtr 
)

Opens a cursor at either end of an index.

Parameters
[in]from_leftTrue if open to the low end, false if to the high end
[in]indexIndex
[in]latch_modeLatch mode
[in,out]cursorCursor
[in]levelLevel to search for (0=leaf)
[in]locationLocation where called
[in,out]mtrMini-transaction

◆ btr_cur_open_at_index_side_with_no_latch()

void btr_cur_open_at_index_side_with_no_latch ( bool  from_left,
dict_index_t index,
btr_cur_t cursor,
ulint  level,
ut::Location  location,
mtr_t mtr 
)

Opens a cursor at either end of an index.

Avoid taking latches on buffer, just pin (by incrementing fix_count) to keep them in buffer pool. This mode is used by intrinsic table as they are not shared and so there is no need of latching.

Parameters
[in]from_lefttrue if open to low end, false if open to high end.
[in]indexIndex
[in,out]cursorCursor
[in]levelLevel to search for (0=leaf)
[in]locationLocation where called
[in,out]mtrMini-transaction

◆ btr_cur_open_at_rnd_pos()

bool btr_cur_open_at_rnd_pos ( dict_index_t index,
ulint  latch_mode,
btr_cur_t cursor,
const char *  file,
ulint  line,
mtr_t mtr 
)

Positions a cursor at a randomly chosen position within a B-tree.

Returns
true if the index is available and we have put the cursor, false if the index is unavailable
Parameters
indexin: index
latch_modein: BTR_SEARCH_LEAF, ...
cursorin/out: B-tree cursor
filein: file name
linein: line where called
mtrin: mtr

◆ btr_cur_optimistic_delete_func()

bool btr_cur_optimistic_delete_func ( btr_cur_t cursor,
ulint  flags,
mtr_t mtr 
)

◆ btr_cur_optimistic_insert()

dberr_t btr_cur_optimistic_insert ( ulint  flags,
btr_cur_t cursor,
ulint **  offsets,
mem_heap_t **  heap,
dtuple_t entry,
rec_t **  rec,
big_rec_t **  big_rec,
que_thr_t thr,
mtr_t mtr 
)

Tries to perform an insert to a page in an index tree, next to cursor.

It is assumed that mtr holds an x-latch on the page. The operation does not succeed if there is too little space on the page. If there is just one record on the page, the insert will always succeed; this is to prevent trying to split a page with just one record.

Returns
DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number
Parameters
flagsin: undo logging and locking flags: if not zero, the parameters index and thr should be specified
cursorin: cursor on page after which to insert; cursor stays valid
offsetsout: offsets on *rec
heapin/out: pointer to memory heap, or NULL
entryin/out: entry to insert
recout: pointer to inserted record if succeed
big_recout: big rec vector whose fields have to be stored externally by the caller, or NULL
thrin: query thread or NULL
mtrin/out: mini-transaction; if this function returns DB_SUCCESS on a leaf page of a secondary index in a compressed tablespace, the caller must mtr_commit(mtr) before latching any further pages

◆ btr_cur_optimistic_latch_leaves()

bool btr_cur_optimistic_latch_leaves ( buf_block_t block,
uint64_t  modify_clock,
ulint latch_mode,
btr_cur_t cursor,
const char *  file,
ulint  line,
mtr_t mtr 
)

Optimistically latches the leaf page or pages requested.

Parameters
[in]blockGuessed buffer block
[in]modify_clockModify clock value
[in,out]latch_modeBTR_SEARCH_LEAF, ...
[in,out]cursorCursor
[in]fileFile name
[in]lineLine where called
[in]mtrMini-transaction
Returns
true if success

◆ btr_cur_optimistic_update()

dberr_t btr_cur_optimistic_update ( ulint  flags,
btr_cur_t cursor,
ulint **  offsets,
mem_heap_t **  heap,
const upd_t update,
ulint  cmpl_info,
que_thr_t thr,
trx_id_t  trx_id,
mtr_t mtr 
)

Tries to update a record on a page in an index tree.

It is assumed that mtr holds an x-latch on the page. The operation does not succeed if there is too little space on the page or if the update would result in too empty a page, so that tree compression is recommended. We assume here that the ordering fields of the record do not change.

Parameters
[in]flagsundo logging and locking flags
[in]cursorcursor on the record to update; cursor stays valid and positioned on the same record
[out]offsetsoffsets on cursor->page_cur.rec
[in,out]heappointer to nullptr or memory heap
[in]updateupdate vector; this must also contain trx id and roll ptr fields
[in]cmpl_infocompiler info on secondary index updates
[in]thrquery thread, or nullptr if flags & (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)
[in]trx_idtransaction id
[in,out]mtrmini-transaction; if this is a secondary index, the caller must mtr_commit(mtr) before latching any further pages
Returns
error code, including
Return values
DB_SUCCESSon success
DB_OVERFLOWif the updated record does not fit
DB_UNDERFLOWif the page would become too empty
DB_ZIP_OVERFLOWif there is not enough space left on the compressed page (IBUF_BITMAP_FREE was reset outside mtr)

◆ btr_cur_parse_del_mark_set_clust_rec()

byte * btr_cur_parse_del_mark_set_clust_rec ( byte ptr,
byte end_ptr,
page_t page,
page_zip_des_t page_zip,
dict_index_t index 
)

Parses the redo log record for delete marking or unmarking of a clustered index record.

Returns
end of log record or NULL
Parameters
ptrin: buffer
end_ptrin: buffer end
pagein/out: page or NULL
page_zipin/out: compressed page, or NULL
indexin: index corresponding to page

◆ btr_cur_parse_del_mark_set_sec_rec()

byte * btr_cur_parse_del_mark_set_sec_rec ( byte ptr,
byte end_ptr,
page_t page,
page_zip_des_t page_zip 
)

Parses the redo log record for delete marking or unmarking of a secondary index record.

Returns
end of log record or NULL
Parameters
ptrin: buffer
end_ptrin: buffer end
pagein/out: page or NULL
page_zipin/out: compressed page, or NULL

◆ btr_cur_parse_update_in_place()

byte * btr_cur_parse_update_in_place ( byte ptr,
byte end_ptr,
page_t page,
page_zip_des_t page_zip,
dict_index_t index 
)

Parses a redo log record of updating a record in-place.

Returns
end of log record or NULL
Parameters
ptrin: buffer
end_ptrin: buffer end
pagein/out: page or NULL
page_zipin/out: compressed page, or NULL
indexin: index corresponding to page

◆ btr_cur_pess_upd_restore_supremum()

static void btr_cur_pess_upd_restore_supremum ( buf_block_t block,
const rec_t rec,
mtr_t mtr 
)
static

If, in a split, a new supremum record was created as the predecessor of the updated record, the supremum record must inherit exactly the locks on the updated record.

In the split it may have inherited locks from the successor of the updated record, which is not correct. This function restores the right locks for the new supremum.

Parameters
blockin: buffer block of rec
recin: updated record
mtrin: mtr

◆ btr_cur_pessimistic_delete()

bool btr_cur_pessimistic_delete ( dberr_t err,
bool  has_reserved_extents,
btr_cur_t cursor,
uint32_t  flags,
bool  rollback,
trx_id_t  trx_id,
undo_no_t  undo_no,
ulint  rec_type,
mtr_t mtr,
btr_pcur_t pcur,
purge_node_t node 
)

Removes the record on which the tree cursor is positioned.

Tries to compress the page if its fillfactor drops below a threshold or if it is the only page on the level. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.

Parameters
[out]errDB_SUCCESS or DB_OUT_OF_FILE_SPACE; the latter may occur because we may have to update node pointers on upper levels, and in the case of variable length keys these may actually grow in size
[in]has_reserved_extentstrue if the caller has already reserved enough free extents so that he knows that the operation will succeed
[in]cursorCursor on the record to delete; if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit
[in]flagsBTR_CREATE_FLAG or 0
[in]rollbackTrue if performing rollback, false otherwise.
[in]trx_idThe current transaction id.
[in]undo_noUndo number of the transaction. This is needed for rollback to savepoint of partially updated LOB.
[in]rec_typeUndo record type.
[in]mtrThe mini transaction
[in]pcurPersistent cursor on the record to delete.
[in,out]nodepurge node or nullptr
Returns
true if compression occurred

◆ btr_cur_pessimistic_insert()

dberr_t btr_cur_pessimistic_insert ( uint32_t  flags,
btr_cur_t cursor,
ulint **  offsets,
mem_heap_t **  heap,
dtuple_t entry,
rec_t **  rec,
big_rec_t **  big_rec,
que_thr_t thr,
mtr_t mtr 
)

Performs an insert on a page of an index tree.

It is assumed that mtr holds an x-latch on the tree and on the cursor page. If the insert is made on the leaf level, to avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.

Returns
DB_SUCCESS or error number
Parameters
flagsin: undo logging and locking flags: if not zero, the parameter thr should be specified; if no undo logging is specified, then the caller must have reserved enough free extents in the file space so that the insertion will certainly succeed
cursorin: cursor after which to insert; cursor stays valid
offsetsout: offsets on *rec
heapin/out: pointer to memory heap that can be emptied, or NULL
entryin/out: entry to insert
recout: pointer to inserted record if succeed
big_recout: big rec vector whose fields have to be stored externally by the caller, or NULL
thrin: query thread or NULL
mtrin/out: mini-transaction

◆ btr_cur_pessimistic_update()

dberr_t btr_cur_pessimistic_update ( ulint  flags,
btr_cur_t cursor,
ulint **  offsets,
mem_heap_t **  offsets_heap,
mem_heap_t entry_heap,
big_rec_t **  big_rec,
upd_t update,
ulint  cmpl_info,
que_thr_t thr,
trx_id_t  trx_id,
undo_no_t  undo_no,
mtr_t mtr,
btr_pcur_t pcur = nullptr 
)

Performs an update of a record on a page of a tree.

It is assumed that mtr holds an x-latch on the tree and on the cursor page. If the update is made on the leaf level, to avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist.

Parameters
[in]flagsUndo logging, locking, and rollback flags
[in,out]cursorcursor on the record to update; cursor may become invalid if *big_rec == NULL || !(flags & BTR_KEEP_POS_FLAG)
[out]offsetsOffsets on cursor->page_cur.rec
[in,out]offsets_heapPointer to memory heap that can be emptied, or NULL
[in,out]entry_heapMemory heap for allocating big_rec and the index tuple.
[out]big_recBig rec vector whose fields have to be stored externally by the caller, or NULL
[in,out]updateUpdate vector; this is allowed to also contain trx id and roll ptr fields. Non-updated columns that are moved offpage will be appended to this.
[in]cmpl_infoCompiler info on secondary index updates
[in]thrQuery thread, or NULL if flags & (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)
[in]trx_idTransaction id
[in]undo_noUndo number of the transaction. This is needed for rollback to savepoint of partially updated LOB.
[in,out]mtrMini-transaction; must be committed before latching any further pages
[in]pcurThe persistent cursor on the record to update.
Returns
DB_SUCCESS or error code

◆ btr_cur_prefetch_siblings()

static void btr_cur_prefetch_siblings ( buf_block_t block)
static

Prefetch siblings of the leaf for the pessimistic operation.

Parameters
blockleaf page

◆ btr_cur_search_to_nth_level()

void btr_cur_search_to_nth_level ( dict_index_t index,
ulint  level,
const dtuple_t tuple,
page_cur_mode_t  mode,
ulint  latch_mode,
btr_cur_t cursor,
ulint  has_search_latch,
const char *  file,
ulint  line,
mtr_t mtr 
)

Searches an index tree and positions a tree cursor on a given level.

NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. If mode is PAGE_CUR_GE, then up_match will a have a sensible value.

If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the search tuple should be performed in the B-tree. InnoDB does an insert immediately after the cursor. Thus, the cursor may end up on a user record, or on a page infimum record.

Parameters
indexin: index
levelin: the tree level of search
tuplein: data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field!
modein: PAGE_CUR_L, ...; Inserts should always be made using PAGE_CUR_LE to search the position!
latch_modein: BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, BTR_DELETE_MARK, BTR_DELETE, or BTR_ESTIMATE; cursor->left_block is used to store a pointer to the left neighbor page, in the cases BTR_SEARCH_PREV and BTR_MODIFY_PREV; NOTE that if has_search_latch is != 0, we maybe do not have a latch set on the cursor page, we assume the caller uses his search latch to protect the record!
cursorin/out: tree cursor; the cursor page is s- or x-latched, but see also above!
has_search_latchin: info on the latch mode the caller currently has on search system: RW_S_LATCH, or 0
filein: file name
linein: line where called
mtrin: mtr

◆ btr_cur_search_to_nth_level_with_no_latch()

void btr_cur_search_to_nth_level_with_no_latch ( dict_index_t index,
ulint  level,
const dtuple_t tuple,
page_cur_mode_t  mode,
btr_cur_t cursor,
const char *  file,
ulint  line,
mtr_t mtr,
bool  mark_dirty 
)

Searches an index tree and positions a tree cursor on a given level.

This function will avoid placing latches while traversing the path and so should be used only for cases where-in latching is not needed.

Parameters
[in]indexIndex
[in]levelThe tree level of search
[in]tupleData tuple; Note: n_fields_cmp in compared to the node ptr page node field
[in]modePAGE_CUR_L, .... Insert should always be made using PAGE_CUR_LE to search the position.
[in,out]cursorTree cursor; points to record of interest.
[in]fileFile name
[in]lineLine where called from
[in,out]mtrMini-transaction
[in]mark_dirtyif true then mark the block as dirty

◆ btr_cur_set_deleted_flag_for_ibuf()

void btr_cur_set_deleted_flag_for_ibuf ( rec_t rec,
page_zip_des_t page_zip,
bool  val,
mtr_t mtr 
)

Sets a secondary index record's delete mark to the given value.

This function is only used by the insert buffer merge mechanism.

Parameters
recin/out: record
page_zipin/out: compressed page corresponding to rec, or NULL when the tablespace is uncompressed
valin: value to set
mtrin/out: mini-transaction

◆ btr_cur_upd_lock_and_undo()

static dberr_t btr_cur_upd_lock_and_undo ( ulint  flags,
btr_cur_t cursor,
const ulint offsets,
const upd_t update,
ulint  cmpl_info,
que_thr_t thr,
mtr_t mtr,
roll_ptr_t roll_ptr 
)
inlinestatic

For an update, checks the locks and does the undo logging.

Returns
DB_SUCCESS, DB_WAIT_LOCK, or error number
Parameters
flagsin: undo logging and locking flags
cursorin: cursor on record to update
offsetsin: rec_get_offsets() on cursor
updatein: update vector
cmpl_infoin: compiler info on secondary index updates
thrin: query thread (can be NULL if BTR_NO_LOCKING_FLAG)
mtrin/out: mini-transaction
roll_ptrout: roll pointer

◆ btr_cur_update_alloc_zip_func()

bool btr_cur_update_alloc_zip_func ( page_zip_des_t page_zip,
page_cur_t cursor,
dict_index_t index,
ulint offsets,
ulint  length,
bool  create,
mtr_t mtr 
)

See if there is enough place in the page modification log to log an update-in-place.

Parameters
[in,out]page_zipCompressed page.
[in,out]cursorB-tree page cursor.
[in]indexThe index corresponding to cursor.
[in,out]offsetsOffsets of the cursor record.
[in]lengthsize needed
[in]createtrue=delete-and-insert, false=update-in-place
[in,out]mtrMini-transaction.
Return values
falseif out of space; IBUF_BITMAP_FREE will be reset outside mtr if the page was re-compressed
trueif enough place;

IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is a secondary index leaf page. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(mtr).

◆ btr_cur_update_in_place()

dberr_t btr_cur_update_in_place ( ulint  flags,
btr_cur_t cursor,
ulint offsets,
const upd_t update,
ulint  cmpl_info,
que_thr_t thr,
trx_id_t  trx_id,
mtr_t mtr 
)

Updates a record when the update causes no size changes in its fields.

Parameters
[in]flagsUndo logging and locking flags
[in]cursorCursor on the record to update; cursor stays valid and positioned on the same record
[in,out]offsetsOffsets on cursor->page_cur.rec
[in]updateUpdate vector
[in]cmpl_infoCompiler info on secondary index updates
[in]thrQuery thread, or null if flags & (btr_no_locking_flag | btr_no_undo_log_flag | btr_create_flag | btr_keep_sys_flag)
[in]trx_idTransaction id
[in,out]mtrMini-transaction; if this is a secondary index, the caller must mtr_commit(mtr) before latching any further pages
Returns
locking or undo log related error code, or
Return values
DB_SUCCESSon success
DB_ZIP_OVERFLOWif there is not enough space left on the compressed page (IBUF_BITMAP_FREE was reset outside mtr)

◆ btr_cur_update_in_place_log()

void btr_cur_update_in_place_log ( ulint  flags,
const rec_t rec,
dict_index_t index,
const upd_t update,
trx_id_t  trx_id,
roll_ptr_t  roll_ptr,
mtr_t mtr 
)

Writes a redo log record of updating a record in-place.

Parameters
[in]flagsUndo logging and locking flags
[in]recRecord
[in]indexIndex of the record
[in]updateUpdate vector
[in]trx_idTransaction id
[in]roll_ptrRoll ptr
[in]mtrMini-transaction

◆ btr_cur_will_modify_tree()

static bool btr_cur_will_modify_tree ( dict_index_t index,
const page_t page,
btr_intention_t  lock_intention,
const rec_t rec,
ulint  rec_size,
const page_size_t page_size,
mtr_t mtr 
)
static

Detects whether the modifying record might need a modifying tree structure.

Parameters
[in]indexindex
[in]pagepage
[in]lock_intentionlock intention for the tree operation
[in]recrecord (current node_ptr)
[in]rec_sizesize of the record or max size of node_ptr
[in]page_sizepage size
[in]mtrmtr
Returns
true if tree modification is needed

◆ btr_estimate_n_rows_in_range()

int64_t btr_estimate_n_rows_in_range ( dict_index_t index,
const dtuple_t tuple1,
page_cur_mode_t  mode1,
const dtuple_t tuple2,
page_cur_mode_t  mode2 
)

Estimates the number of rows in a given index range.

Parameters
[in]indexindex
[in]tuple1range start, may also be empty tuple
[in]mode1search mode for range start
[in]tuple2range end, may also be empty tuple
[in]mode2search mode for range end
Returns
estimated number of rows

◆ btr_estimate_n_rows_in_range_low()

static int64_t btr_estimate_n_rows_in_range_low ( dict_index_t index,
const dtuple_t tuple1,
page_cur_mode_t  mode1,
const dtuple_t tuple2,
page_cur_mode_t  mode2,
unsigned  nth_attempt 
)
static

Estimates the number of rows in a given index range.

Parameters
[in]indexindex
[in]tuple1range start, may also be empty tuple
[in]mode1search mode for range start
[in]tuple2range end, may also be empty tuple
[in]mode2search mode for range end
[in]nth_attemptif the tree gets modified too much while we are trying to analyze it, then we will retry (this function will call itself, incrementing this parameter)
Returns
estimated number of rows; if after rows_in_range_max_retries retries the tree keeps changing, then we will just return rows_in_range_arbitrary_ret_val as a result (if nth_attempt >= rows_in_range_max_retries and the tree is modified between the two dives).

◆ btr_estimate_n_rows_in_range_on_level()

static int64_t btr_estimate_n_rows_in_range_on_level ( dict_index_t index,
btr_path_t slot1,
btr_path_t slot2,
int64_t  n_rows_on_prev_level,
bool *  is_n_rows_exact 
)
static

Estimate the number of rows between slot1 and slot2 for any level on a B-tree.

This function starts from slot1->page and reads a few pages to the right, counting their records. If we reach slot2->page quickly then we know exactly how many records there are between slot1 and slot2 and we set is_n_rows_exact to true. If we cannot reach slot2->page quickly then we calculate the average number of records in the pages scanned so far and assume that all pages that we did not scan up to slot2->page contain the same number of records, then we multiply that average to the number of pages between slot1->page and slot2->page (which is n_rows_on_prev_level). In this case we set is_n_rows_exact to false.

Returns
number of rows, not including the borders (exact or estimated)
Parameters
indexin: index
slot1in: left border
slot2in: right border
n_rows_on_prev_levelin: number of rows on the previous level for the same descend paths; used to determine the number of pages on this level
is_n_rows_exactout: true if the returned value is exact i.e. not an estimation

◆ btr_estimate_number_of_different_key_vals()

bool btr_estimate_number_of_different_key_vals ( dict_index_t index)

Estimates the number of different key values in a given index, for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).

The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed 0..n_uniq-1) and the number of pages that were sampled is saved in index->stat_n_sample_sizes[]. If innodb_stats_method is nulls_ignored, we also record the number of non-null values for each prefix and stored the estimates in array index->stat_n_non_null_key_vals.

Returns
true if the index is available and we get the estimated numbers, false if the index is unavailable.
Parameters
indexin: index

◆ btr_record_not_null_field_in_rec()

static void btr_record_not_null_field_in_rec ( const dict_index_t index,
ulint  n_unique,
const ulint offsets,
uint64_t *  n_not_null 
)
static

Record the number of non_null key values in a given index for each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).

The estimates are eventually stored in the array: index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1.

Parameters
[in]indexindex
[in]n_uniquedict_index_get_n_unique(index), number of columns uniquely determine an index entry
[in]offsetsrec_get_offsets(rec, index), its size could be for all fields or that of "n_unique"
[in,out]n_not_nullarray to record number of not null rows for n-column prefix

◆ BTR_TABLE_STATS_FROM_SAMPLE()

constexpr uint64_t BTR_TABLE_STATS_FROM_SAMPLE ( uint64_t  value,
dict_index_t index,
uint64_t  sample,
ulint  ext_size,
ulint  not_empty 
)
constexpr

Estimated table level stats from sampled value.

Parameters
valuesampled stats
indexindex being sampled
samplenumber of sampled rows
ext_sizeexternal stored data size
not_emptytable not empty
Returns
estimated table wide stats from sampled value

◆ materialize_instant_default()

bool materialize_instant_default ( const dict_index_t index,
const rec_t rec 
)

If default value of INSTANT ADD column is to be materialize in updated row.

Parameters
[in]indexrecord descriptor
[in]recrecord
Returns
true if instant add column(s) to be materialized.

Variable Documentation

◆ BTR_CUR_FINE_HISTORY_LENGTH

constexpr uint32_t BTR_CUR_FINE_HISTORY_LENGTH = 100000
constexpr

For the index->lock scalability improvement, only possibility of clear performance regression observed was caused by grown huge history list length.

That is because the exclusive use of index->lock also worked as reserving free blocks and read IO bandwidth with priority. To avoid huge glowing history list as same level with previous implementation, prioritizes pessimistic tree operations by purge as the previous, when it seems to be growing huge.

Experimentally, the history list length starts to affect to performance throughput clearly from about 100000.

◆ btr_cur_limit_optimistic_insert_debug

uint btr_cur_limit_optimistic_insert_debug = 0

◆ btr_cur_n_non_sea

ulint btr_cur_n_non_sea = 0

Number of searches down the B-tree in btr_cur_search_to_nth_level().

◆ btr_cur_n_non_sea_old

ulint btr_cur_n_non_sea_old = 0

Old value of btr_cur_n_non_sea.

Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor().

◆ btr_cur_n_sea

ulint btr_cur_n_sea = 0

Number of successful adaptive hash index lookups in btr_cur_search_to_nth_level().

◆ btr_cur_n_sea_old

ulint btr_cur_n_sea_old = 0

Old value of btr_cur_n_sea.

Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor().

◆ rows_in_range_arbitrary_ret_val

const int64_t rows_in_range_arbitrary_ret_val = 10
static

We pretend that a range has that many records if the tree keeps changing for rows_in_range_max_retries retries while we try to estimate the records in a given range.

◆ rows_in_range_max_retries

const unsigned rows_in_range_max_retries = 4
static

If the tree gets changed too much between the two dives for the left and right boundary then btr_estimate_n_rows_in_range_low() will retry that many times before giving up and returning the value stored in rows_in_range_arbitrary_ret_val.