MySQL 9.1.0
Source Code Documentation
|
Row versions. More...
#include <stddef.h>
#include "btr0btr.h"
#include "current_thd.h"
#include "dict0boot.h"
#include "dict0dict.h"
#include "ha_prototypes.h"
#include "lock0lock.h"
#include "mach0data.h"
#include "que0que.h"
#include "read0read.h"
#include "rem0cmp.h"
#include "row0ext.h"
#include "row0mysql.h"
#include "row0row.h"
#include "row0upd.h"
#include "row0vers.h"
#include "trx0purge.h"
#include "trx0rec.h"
#include "trx0roll.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0undo.h"
#include "my_dbug.h"
Functions | |
static bool | row_vers_non_vc_index_entry_match (dict_index_t *index, const dtuple_t *ientry1, const dtuple_t *ientry2, ulint *n_non_v_col) |
Check whether all non-virtual columns in a index entries match. More... | |
static bool | row_clust_vers_matches_sec (const dict_index_t *const clust_index, const rec_t *const clust_rec, const dtuple_t *const clust_vrow, const ulint *const clust_offsets, const dict_index_t *const sec_index, const rec_t *const sec_rec, const ulint *const sec_offsets, const bool comp, const bool looking_for_match, mem_heap_t *const heap) |
Checks if a particular version of a record from clustered index matches the secondary index record. More... | |
static bool | row_vers_find_matching (bool looking_for_match, const dict_index_t *const clust_index, const rec_t *const clust_rec, ulint *&clust_offsets, const dict_index_t *const sec_index, const rec_t *const sec_rec, const ulint *const sec_offsets, const bool comp, const trx_id_t trx_id, mtr_t *const mtr, mem_heap_t *&heap) |
Loops through the history of clustered index record in the undo log, stopping after the first version which was not created by the given active transaction, and reports if it found a version which satisfies criterion specified by looking_for_match. More... | |
static trx_t * | row_vers_impl_x_locked_low (const rec_t *const clust_rec, const dict_index_t *const clust_index, const rec_t *const sec_rec, const dict_index_t *const sec_index, const ulint *const sec_offsets, mtr_t *const mtr) |
Finds out if an active transaction has inserted or modified a secondary index record. More... | |
trx_t * | row_vers_impl_x_locked (const rec_t *rec, const dict_index_t *index, const ulint *offsets) |
Finds out if an active transaction has inserted or modified a secondary index record. More... | |
bool | row_vers_must_preserve_del_marked (trx_id_t trx_id, const table_name_t &name, mtr_t *mtr) |
Finds out if we must preserve a delete marked earlier version of a clustered index record, because it is >= the purge view. More... | |
static void | row_vers_build_clust_v_col (dtuple_t *row, dict_index_t *clust_index, dict_index_t *index, mem_heap_t *heap) |
build virtual column value from current cluster index record data More... | |
static void | row_vers_build_cur_vrow_low (bool in_purge, const rec_t *rec, dict_index_t *clust_index, ulint *clust_offsets, dict_index_t *index, roll_ptr_t roll_ptr, trx_id_t trx_id, mem_heap_t *v_heap, const dtuple_t **vrow, mtr_t *mtr) |
Build latest virtual column data from undo log. More... | |
static bool | row_vers_vc_matches_cluster (bool in_purge, const rec_t *rec, const dtuple_t *icentry, dict_index_t *clust_index, ulint *clust_offsets, dict_index_t *index, const dtuple_t *ientry, roll_ptr_t roll_ptr, trx_id_t trx_id, mem_heap_t *v_heap, const dtuple_t **vrow, mtr_t *mtr) |
Check a virtual column value index secondary virtual index matches that of current cluster index record, which is recreated from information stored in undo log. More... | |
static const dtuple_t * | row_vers_build_cur_vrow (bool in_purge, const rec_t *rec, dict_index_t *clust_index, ulint **clust_offsets, dict_index_t *index, roll_ptr_t roll_ptr, trx_id_t trx_id, mem_heap_t *heap, mem_heap_t *v_heap, mtr_t *mtr) |
Build a dtuple contains virtual column data for current cluster index. More... | |
bool | row_vers_old_has_index_entry (bool also_curr, const rec_t *rec, mtr_t *mtr, dict_index_t *index, const dtuple_t *ientry, roll_ptr_t roll_ptr, trx_id_t trx_id) |
Finds out if a version of the record, where the version >= the current purge view, should have ientry as its secondary index entry. More... | |
dberr_t | row_vers_build_for_consistent_read (const rec_t *rec, mtr_t *mtr, dict_index_t *index, ulint **offsets, ReadView *view, mem_heap_t **offset_heap, mem_heap_t *in_heap, rec_t **old_vers, const dtuple_t **vrow, lob::undo_vers_t *lob_undo) |
Constructs the version of a clustered index record which a consistent read should see. More... | |
void | row_vers_build_for_semi_consistent_read (const rec_t *rec, mtr_t *mtr, dict_index_t *index, ulint **offsets, mem_heap_t **offset_heap, mem_heap_t *in_heap, const rec_t **old_vers, const dtuple_t **vrow) |
Constructs the last committed version of a clustered index record, which should be seen by a semi-consistent read. More... | |
Row versions.
Created 2/6/1997 Heikki Tuuri
|
static |
Checks if a particular version of a record from clustered index matches the secondary index record.
The match occurs if and only if two condition hold: 1) the clust_rec exists and is not delete marked 2) the values in columns in clust_rec match those in sec_rec Please note that the delete marker on sec_rec does not play any role in this definition!
[in] | clust_index | the clustered index |
[in] | clust_rec | the clustered index record, can be null or delete marked |
[in] | clust_vrow | the values of virtual columns, can be NULL if the clust_rec was stored in undo log by operation that did not change any secondary index column (and was not a DELETE operation) |
[in] | clust_offsets | the offsets for clust_rec, rec_get_offsets(clust_rec, clust_index) |
[in] | sec_index | the secondary index |
[in] | sec_rec | the secondary index record |
[in] | sec_offsets | the offsets for secondary index record, rec_get_offsets(sec_rec, sec_index) |
[in] | comp | the compression flag for both the clustered and the secondary index, as both are assumed equal |
[in] | looking_for_match | are we looking for match? false means that we are looking for non-match |
[in] | heap | the heap to be used for all allocations |
If we could not find a clust_rec version, it means it either never existed or was garbage collected, in either case we can interpret it as the row not being present at that point in time. Similarly, if it is delete marked. In all this cases, we report that there is no match.
If the index involves virtual columns, then we can relay on the assumption that trx_undo_prev_version_build
will try to retrieve clust_vrow, and the only reason it can not retrieve it is because there was no change to any of the indexed columns. In particular this should mean, that the answer to the question "does this clust_rec match sec_row?" did not change, and we can return the same value as before, which was !looking_for_match. We know it was not looking_for_match because in such case the loop would stop.
There are some difficulties we should take into consideration here:
clust_rec
, which we know has all the important columns equal to the most recent one. Moreover, we know that clust_rec
is not delete marked. We also know, that the most recent version is also not delete marked, because, if it was delete marked, then the most recent change would be a DELETE operation, and in such cases we always undo log the values of columns, yet clust_vrow
is null. So, the most recent version, and the version just before it, not only have the same values of indexed columns, but also the same delete mark. If so, then it is impossible, that this particular change created, or removed a secondary index entry. Therefore we need to continue the loop, and to do so we have to return the opposite of what the loop is searching for, thus !looking_for_match.Here's a bit different argument, perhaps more persuasive in case we want to prove that the returned value correctly answers the question "does clust_rec match the sec_rec?". Consider two cases, depending on sec_rec delete mark:
A) sec_rec is delete marked In this case, looking_for_match is set to true, thus we are about to return false. So, our claim is that clust_rec does not match sec_rec. For consider for a moment the opposite, that clust_rec does match sec_rec - it would follow, that also the most recent version matches sec_rec, as it has the same values of columns, and delete mark. But then, we have that two most recent versions of the clustered index record are not delete marked and match the secondary index record, yet for some reason the change was not synchronized to the secondary index, which is still delete marked! This contradicts the assumption that at most one most recent change is not synchronized to the secondary index.
B) sec_rec is not delete marked In this case, looking_for_match is set to false, thus we are about to return true. So, our claim is that clust_rec does match sec_rec. For consider for a moment the opposite, that clust_rec doesn't match sec_rec - it would follow, that also the most recent version doesn't match sec_rec, as it has the same values of columns, and delete mark. But then, we have that two most recent versions of the clustered index record do not match the secondary index record, yet for some reason the change was not synchronized to the secondary index, which is still not delete marked! This contradicts the assumption that at most one most recent change is not synchronized to the secondary index.
Again, before getting here we've already established that clust_rec
is not delete marked, and if clust_vrow
is missing, then it must mean that the later version is also not delete marked, as otherwise we would have to log all columns to the undo log
Reconstruct all the columns
If the reconstructed values do not match the secondary index then we know we should report no match. We compare the strings in binary mode to make it more robust, because a thread which has changed "a" to "A" should prevent concurrent transactions from peeking into the new binary representation, say via CONVERT(column_name, binary).
|
static |
build virtual column value from current cluster index record data
[in,out] | row | the cluster index row in dtuple form |
[in] | clust_index | clustered index |
[in] | index | the secondary index |
[in] | heap | heap used to build virtual dtuple |
|
static |
Build a dtuple contains virtual column data for current cluster index.
[in] | in_purge | called by purge thread |
[in] | rec | cluster index rec |
[in] | clust_index | cluster index |
[in] | clust_offsets | cluster rec offset |
[in] | index | secondary index |
[in] | roll_ptr | roll_ptr for the purge record |
[in] | trx_id | transaction ID on the purging record |
[in,out] | heap | heap memory |
[in,out] | v_heap | heap memory to keep virtual column dtuple |
[in] | mtr | mtr holding the latch on rec |
|
static |
Build latest virtual column data from undo log.
[in] | in_purge | whether this is the purge thread |
[in] | rec | clustered index record |
[in] | clust_index | clustered index |
[in,out] | clust_offsets | offsets on the clustered index record |
[in] | index | the secondary index |
[in] | roll_ptr | the rollback pointer for the purging record |
[in] | trx_id | trx id for the purging record |
[in,out] | v_heap | heap used to build vrow |
[out] | vrow | dtuple holding the virtual rows |
[in,out] | mtr | mtr holding the latch on rec |
dberr_t row_vers_build_for_consistent_read | ( | const rec_t * | rec, |
mtr_t * | mtr, | ||
dict_index_t * | index, | ||
ulint ** | offsets, | ||
ReadView * | view, | ||
mem_heap_t ** | offset_heap, | ||
mem_heap_t * | in_heap, | ||
rec_t ** | old_vers, | ||
const dtuple_t ** | vrow, | ||
lob::undo_vers_t * | lob_undo | ||
) |
Constructs the version of a clustered index record which a consistent read should see.
We assume that the trx id stored in rec is such that the consistent read should not see rec in its present version.
[in] | rec | record in a clustered index; the caller must have a latch on the page; this latch locks the top of the stack of versions of this records |
[in] | mtr | mtr holding the latch on rec; it will also hold the latch on purge_view |
[in] | index | the clustered index |
[in] | offsets | offsets returned by rec_get_offsets(rec, index) |
[in] | view | the consistent read view |
[in,out] | offset_heap | memory heap from which the offsets are allocated |
[in] | in_heap | memory heap from which the memory for *old_vers is allocated; memory for possible intermediate versions is allocated and freed locally within the function |
[out] | old_vers | old version, or NULL if the history is missing or the record does not exist in the view, that is, it was freshly inserted afterwards. |
[out] | vrow | reports virtual column info if any |
[in] | lob_undo | undo log to be applied to blobs. |
void row_vers_build_for_semi_consistent_read | ( | const rec_t * | rec, |
mtr_t * | mtr, | ||
dict_index_t * | index, | ||
ulint ** | offsets, | ||
mem_heap_t ** | offset_heap, | ||
mem_heap_t * | in_heap, | ||
const rec_t ** | old_vers, | ||
const dtuple_t ** | vrow | ||
) |
Constructs the last committed version of a clustered index record, which should be seen by a semi-consistent read.
[in] | rec | Record in a clustered index; the caller must have a latch on the page; this latch locks the top of the stack of versions of this records |
[in] | mtr | Mini-transaction holding the latch on rec |
[in] | index | The clustered index |
[in,out] | offsets | Offsets returned by rec_get_offsets(rec, index) |
[in,out] | offset_heap | Memory heap from which the offsets are allocated |
[in] | in_heap | Memory heap from which the memory for *old_vers is allocated; memory for possible intermediate versions is allocated and freed locally within the function |
[out] | old_vers | Rec, old version, or null if the record does not exist in the view, that is, it was freshly inserted afterwards |
[out] | vrow | Virtual row, old version, or null if it is not updated in the view |
|
static |
Loops through the history of clustered index record in the undo log, stopping after the first version which was not created by the given active transaction, and reports if it found a version which satisfies criterion specified by looking_for_match.
If looking_for_match is true, it searches for a version which matches the secondary index record. Otherwise it searches for a version which does not match.
[in] | looking_for_match | are we looking for match? false means that we are looking for non-match |
[in] | clust_index | the clustered index |
[in] | clust_rec | the clustered index record, can be null or delete marked |
[in] | clust_offsets | the offsets for clust_rec, rec_get_offsets(clust_rec, clust_index) |
[in] | sec_index | the secondary index |
[in] | sec_rec | the secondary index record |
[in] | sec_offsets | the offsets for secondary index record, rec_get_offsets(sec_rec, sec_index) |
[in] | comp | the compression flag for both the clustered and the secondary index, as both are assumed equal |
[in] | trx_id | the active transaction which created the most recent version of clustered index record |
[in] | mtr | the mtr inside which we are operating |
[in,out] | heap | the heap to be used for all allocations. This heap might get deallocated, and a newly allocated one will be returned, along with its ownership |
trx_t * row_vers_impl_x_locked | ( | const rec_t * | rec, |
const dict_index_t * | index, | ||
const ulint * | offsets | ||
) |
Finds out if an active transaction has inserted or modified a secondary index record.
[in] | rec | record in a secondary index |
[in] | index | the secondary index |
[in] | offsets | rec_get_offsets(rec, index) |
|
inlinestatic |
Finds out if an active transaction has inserted or modified a secondary index record.
[in] | clust_rec | Clustered index record |
[in] | clust_index | The clustered index |
[in] | sec_rec | Secondary index record |
[in] | sec_index | The secondary index |
[in] | sec_offsets | Rec_get_offsets(sec_rec, sec_index) |
[in,out] | mtr | Mini-transaction |
Here's my best understanding of what this code is doing.
When we call this function we already have sec_rec
- a row from secondary index sec_index
, which includes:
comp
)rec_del
: 32 or 0)We assume that this sec_rec
really is a record in the secondary index, as opposed to some artificially "made up" sequence of bytes. Moreover we assume that this secondary index row is currently latched (not to be confused with "locked"), so that sec_rec
is the most current state of this row.
Also, we assume, that rows in secondary index are either added, or removed, (or delete marked, or delete un-marked) but never modified. Moreover, we assume, that each of these secondary index operations is done after the primary (clustered) index was modified, to reflect the new state of affairs.
We assume that clust_rec
is the current version of the clustered index record to which the secondary record sec_rec
points to.
To be more precise:
Let S[f] mean value of field f in the secondary index record S. Let C[t][f] mean value of field f in version t of clustered record C, where we use consecutive natural numbers to denote versions: t=0,1,...,current_version.
Note: secondary index is not versioned
Let S.deleted and C[t].deleted be delete markers of these records.
Definition 1. We say that secondary index row S points-to
a clustered index row C if and only if: S[pkey] = C[t][pkey] for each primary key column pkey (for any version t)
Note: it does not matter which version t we pick, as for our purposes primary key fields may be thought as immutable (say, we emulate their modification by combination of delete + insert).
Definition 2. We say that secondary index row S matches
a clustered index row C in version t if and only if: (S[f] = C[t][f] for each column f) and not (C[t].deleted)
Note: In the above definition f might be a virtual column. Note: There might be multiple versions which a single S matches
, for example when a transaction modifies a row back and forth, or changes columns which are not indexed by secondary index. Note: The definition of matches
does not depend on S.deleted
Definition 3. We say that secondary index row S corresponds-to
a clustered index row C in version t if and only if: (not(S.deleted) and (S matches
C[t])) or (S.deleted and not (S matches
C[t]))
In other words, S corresponds-to
C[t] means that the state of secondary index row S is synchronized with the state of the row in clustered index in version t.
Assumption 1. (S corresponds-to
C[current_version]) or (S corresponds-to
C[current_version-1]) In other words, sec_rec
corresponds-to
either the most current_version of the primary record it points-to
(i.e. the changes in the clustered index were synchronized to the secondary index), or the current_version-1 - (i.e. the changes in the clustered index was not synchronized to the secondary index yet). This belief is supported by reading the source code and observation that to modify secondary index, one has to modify clustered index first, and modifying clustered index and later secondary index requires holding (implicit or explicit) lock on the clustered index record, so there is at most one transaction operating on any given clustered index row, and thus at most one change "unsynchronized" to secondary index yet.
An equivalent formulation of Assumption 1 in terms of matches
is: (not(S.deleted) => ((S matches
C[current_version]) or (S matches
C[current_version-1])) ) and ( S.deleted => not((S matches
C[current_version]) and (S matches
C[current_version-1])) ) So, a non-deleted S implies that one of the two most recent versions matches
it, and a deleted S, means that at least one of the two most recent versions does not match
it.
Definition 4. We say that S could-be-authored-by
a clustered index row C in version t if and only if: (S corresponds-to
C[t]) and !(S corresponds-to
C[t-1])
This can be equivalently expressed using matches
relation as: (not(S.deleted) and (S matches
C[t]) and not(S matches
C[t-1])) or (S.deleted and not(S matches
C[t]) and (S matches
C[t-1]))
Definition 5. We say that secondary index row S was-authored-by
a clustered index row C in version t if and only if: (S could-be-authored-by
C[t]) and (for each v > t. not(S could-be-authored-by
C[v])) So, t is the latest version in which S could-be-authored-by
C[t].
Equivalently, one can define was-authored-by
in terms of matches
, by identifying the most recent version t for which matches
relation between S and C[t] has changed in the right direction, that is, in case S.deleted we search for the first change from (S matches
C[t-1]) to not(S matches
C[t]), while in case of not(S.deleted) we search for the first change from not(S matches
C[t-1]) to (S matches
C[t]).
We are now ready to explain precisely what the call to row_vers_impl_x_locked_low(C=clust_rec
,...,S=sec_rec
,...) tries to achieve.
If there is t, such that S was-authored-by
C[t], and C[t].trx_id is active then the return value is C[t].trx_id. Otherwise the return value is 0.
The implementation is tricky, as it tries hard to avoid ever looking at the C[current_version], instead looking only at older versions. (One reason for this effort, IMHO, is that virtual columns might be expensive to materialize, and are not stored in clustered index at all. Another reason, I guess, might be to have only one way of reading data - from undo log). Take a moment to realize that this is wonderful that it's even possible, as this is not apparent from the Def 5! After all it might well be the case that the t
we are looking for is equal to current_version
in which case the definition of was-authored-by
used naively would require us to check if S matches
C[current_version], which in turn done naively forces us to look at fields of C[current_version]!
So, how can we do that without ever looking at C[current_version] fields?
We start by reading C[current_version].trx_id, and this is the only piece of information we read from current_version. We store that in trx_id
variable.
We check if trx_id
is active.
If trx_id
is not active, then we know that we can return 0. Why? Because it is impossible for any other C[t].trx_id to be still active, if the most recent trx to modify the record is already inactive.
From now on we assume that trx_id
is active.
We observe that the definition of S was-authored-by
C[t] requires not(S corresponds-to
C[t-1]). So, one thing we can use to filter interesting versions, is to proceed through most recent versions t=current_version, current_version-1, ... until we find the first t, such that not(S corresponds-to
C[t-1]). Surprisingly this is the only condition we have to check! Why? Observe, that it must also be the case that (S corresponds-to
C[t]), because we either have tested that explicitly in the previous step of the loop or in case of first iteration, it follows from Assumption 1. This means, that (S could-be-authored-by
C[t]), and since the t is maximal, we have (S was-authored-by
C[t]).
Therefore our algorithm is to simply loop over versions t, as long as C[t].trx_id = trx_id, and stop as soon as not(S corresponds-to
C[t-1]) in which case the answer is yes, or if we can't find such a version, the answer is no.
The reality is however much more complicated, as it needs to deal with: A) incomplete history of versions (we remove old undo log entries from tail) B) missing information about virtual columns (we don't log values of virtual columns to undo log if they had not changed)
I'll explain our approach to these two problems in comments at the place they are handled.
bool row_vers_must_preserve_del_marked | ( | trx_id_t | trx_id, |
const table_name_t & | name, | ||
mtr_t * | mtr | ||
) |
Finds out if we must preserve a delete marked earlier version of a clustered index record, because it is >= the purge view.
[in] | trx_id | Transaction id in the version |
[in] | name | Table name |
[in,out] | mtr | Mini-transaction holding the latch on the clustered index record; it will also hold the latch on purge_view |
|
static |
Check whether all non-virtual columns in a index entries match.
[in] | index | the secondary index |
[in] | ientry1 | first index entry to compare |
[in] | ientry2 | second index entry to compare |
[in,out] | n_non_v_col | number of non-virtual columns in the index |
bool row_vers_old_has_index_entry | ( | bool | also_curr, |
const rec_t * | rec, | ||
mtr_t * | mtr, | ||
dict_index_t * | index, | ||
const dtuple_t * | ientry, | ||
roll_ptr_t | roll_ptr, | ||
trx_id_t | trx_id | ||
) |
Finds out if a version of the record, where the version >= the current purge view, should have ientry as its secondary index entry.
We check if there is any not delete marked version of the record where the trx id >= purge view, and the secondary index entry and ientry are identified in the alphabetical ordering; exactly in this case we return true.
also_curr | in: true if also rec is included in the versions to search; otherwise only versions prior to it are searched |
rec | in: record in the clustered index; the caller must have a latch on the page |
mtr | in: mtr holding the latch on rec; it will also hold the latch on purge_view |
index | in: the secondary index |
ientry | in: the secondary index entry |
roll_ptr | in: roll_ptr for the purge record |
trx_id | in: transaction ID on the purging record |
|
static |
Check a virtual column value index secondary virtual index matches that of current cluster index record, which is recreated from information stored in undo log.
[in] | in_purge | called by purge thread |
[in] | rec | record in the clustered index |
[in] | icentry | the index entry built from a cluster row |
[in] | clust_index | cluster index |
[in] | clust_offsets | offsets on the cluster record |
[in] | index | the secondary index |
[in] | ientry | the secondary index entry |
[in] | roll_ptr | the rollback pointer for the purging record |
[in] | trx_id | trx id for the purging record |
[in,out] | v_heap | heap used to build virtual dtuple |
[in,out] | vrow | dtuple holding the virtual rows (if needed) |
[in] | mtr | mtr holding the latch on rec |