MySQL 9.3.0
Source Code Documentation
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
trx0sys.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1996, 2025, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/trx0sys.h
29 Transaction system
30
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33
34#ifndef trx0sys_h
35#define trx0sys_h
36
37#include "univ.i"
38
39#include "buf0buf.h"
40#include "fil0fil.h"
41#include "trx0types.h"
42#ifndef UNIV_HOTBACKUP
43#include "mem0mem.h"
44#include "mtr0mtr.h"
45#include "page0types.h"
46#include "ut0byte.h"
47#include "ut0class_life_cycle.h"
48#include "ut0guarded.h"
49#include "ut0lst.h"
50#include "ut0mutex.h"
51#endif /* !UNIV_HOTBACKUP */
52#include <atomic>
53#include <unordered_map>
54#include <vector>
55#include "trx0trx.h"
56
57#ifndef UNIV_HOTBACKUP
58
59// Forward declaration
60class MVCC;
61class ReadView;
62
63/** The transaction system */
64extern trx_sys_t *trx_sys;
65
66/** Checks if a page address is the trx sys header page.
67@param[in] page_id page id
68@return true if trx sys header page */
69static inline bool trx_sys_hdr_page(const page_id_t &page_id);
70
71/** Creates and initializes the central memory structures for the transaction
72 system. This is called when the database is started.
73 @return min binary heap of rsegs to purge */
75/** Creates the trx_sys instance and initializes purge_queue and mutex. */
76void trx_sys_create(void);
77/** Creates and initializes the transaction system at the database creation. */
79
80/** Find the page number in the TRX_SYS page for a given slot/rseg_id
81@param[in] rseg_id slot number in the TRX_SYS page rseg array
82@return page number from the TRX_SYS page rseg array */
84
85/** Look for a free slot for a rollback segment in the trx system file copy.
86@param[in,out] mtr mtr
87@return slot index or ULINT_UNDEFINED if not found */
89
90/** Gets a pointer to the transaction system file copy and x-locks its page.
91 @return pointer to system file copy, page x-locked */
92static inline trx_sysf_t *trx_sysf_get(mtr_t *mtr); /*!< in: mtr */
93
94/** Gets the space of the nth rollback segment slot in the trx system
95file copy.
96@param[in] sys_header trx sys file copy
97@param[in] i slot index == rseg id
98@param[in] mtr mtr
99@return space id */
101 ulint i, mtr_t *mtr);
102
103/** Gets the page number of the nth rollback segment slot in the trx system
104file copy.
105@param[in] sys_header trx sys file copy
106@param[in] i slot index == rseg id
107@param[in] mtr mtr
108@return page number, FIL_NULL if slot unused */
110 ulint i, mtr_t *mtr);
111
112/** Sets the space id of the nth rollback segment slot in the trx system
113file copy.
114@param[in] sys_header trx sys file copy
115@param[in] i slot index == rseg id
116@param[in] space space id
117@param[in] mtr mtr */
118static inline void trx_sysf_rseg_set_space(trx_sysf_t *sys_header, ulint i,
119 space_id_t space, mtr_t *mtr);
120
121/** Set the page number of the nth rollback segment slot in the trx system
122file copy.
123@param[in] sys_header trx sys file copy
124@param[in] i slot index == rseg id
125@param[in] page_no page number, FIL_NULL if the slot is reset to
126 unused
127@param[in] mtr mtr */
128static inline void trx_sysf_rseg_set_page_no(trx_sysf_t *sys_header, ulint i,
129 page_no_t page_no, mtr_t *mtr);
130
131/** Allocates a new transaction id (for trx->id). Before calling,
132the trx_sys_mutex must be acquired.
133@return new, allocated trx id */
135
136/** Allocates a new transaction number (for trx->no). Before calling,
137the trx_sys_serialisation_mutex must be acquired.
138@return new, allocated trx no */
140
141/** Retrieves a next value that will be allocated if trx_sys_allocate_trx_id()
142or trx_sys_allocate_trx_id_trx_no() was called.
143@return the next trx->id or trx->no that will be allocated */
145
146#ifdef UNIV_DEBUG
147/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
148extern uint trx_rseg_n_slots_debug;
149#endif
150#endif /* !UNIV_HOTBACKUP */
151
152/** Writes a trx id to an index page. In case that the id size changes in some
153future version, this function should be used instead of mach_write_...
154@param[in] ptr pointer to memory where written
155@param[in] id id */
156static inline void trx_write_trx_id(byte *ptr, trx_id_t id);
157
158#ifndef UNIV_HOTBACKUP
159/** Reads a trx id from an index page. In case that the id size changes in
160 some future version, this function should be used instead of
161 mach_read_...
162 @return id */
164 const byte *ptr); /*!< in: pointer to memory from where to read */
165
166/** Checks if a rw transaction with the given id is active.
167Please note, that positive result means only that the trx was active
168at some moment during the call, but it might have already become
169TRX_STATE_COMMITTED_IN_MEMORY before the call returns to the caller, as this
170transition is protected by trx->mutex and Trx_shard's mutex, but it is
171impossible for the caller to hold any of these mutexes when calling this
172function as the function itself internally acquires Trx_shard's mutex which
173would cause recurrent mutex acquisition if caller already had the same mutex,
174or latching order violation in case of holding trx->mutex.
175@param[in] trx_id trx id of the transaction
176@param[in] do_ref_count if true then increment the trx_t::n_ref_count
177@return transaction instance if active, or NULL; */
178static inline trx_t *trx_rw_is_active(trx_id_t trx_id, bool do_ref_count);
179
180/** Persist transaction number limit below which all transaction GTIDs
181are persisted to disk table.
182@param[in] gtid_trx_no transaction number */
183void trx_sys_persist_gtid_num(trx_id_t gtid_trx_no);
184
185/** @return oldest transaction number yet to be committed. */
187
188/** Get a list of all binlog prepared transactions.
189@param[out] trx_ids all prepared transaction IDs. */
190void trx_sys_get_binlog_prepared(std::vector<trx_id_t> &trx_ids);
191
192/** Get current binary log positions stored.
193@param[out] file binary log file name
194@param[out] offset binary log file offset */
195void trx_sys_read_binlog_position(char *file, uint64_t &offset);
196
197/** Update binary log position if not already updated. This is called
198by clone to update any stale binary log position if any transaction
199is yet to update the binary log position in SE.
200@param[in] last_file last noted binary log file name
201@param[in] last_offset last noted binary log offset
202@param[in] file current binary log file name
203@param[in] offset current binary log file offset
204@return true, if binary log position is updated with current. */
205bool trx_sys_write_binlog_position(const char *last_file, uint64_t last_offset,
206 const char *file, uint64_t offset);
207
208/** Updates the offset information about the end of the MySQL binlog entry
209which corresponds to the transaction being committed, external XA transaction
210being prepared or rolled back. In a MySQL replication slave updates the latest
211master binlog position up to which replication has proceeded.
212@param[in] trx Current transaction
213@param[in,out] mtr Mini-transaction for update */
215
216/** Shutdown/Close the transaction system. */
217void trx_sys_close(void);
218
219/** Determine if there are incomplete transactions in the system.
220@return whether incomplete transactions need rollback */
221static inline bool trx_sys_need_rollback();
222
223/** Reads number of recovered transactions which have state
224equal to TRX_STATE_ACTIVE (so are not prepared transactions).
225@return number of active recovered transactions */
227
228/** Validates lists of transactions at the very beginning of the
229pre-dd-shutdown phase. */
231
232/** Validates lists of transactions at the very end of the
233pre-dd-shutdown phase. */
235
236/** Validates lists of transactions after all background threads
237of InnoDB exited during shutdown of MySQL. */
239
240/** Add the transaction to the RW transaction set.
241@param trx transaction instance to add */
242static inline void trx_sys_rw_trx_add(trx_t *trx);
243
244#endif /* !UNIV_HOTBACKUP */
245
246#ifdef UNIV_DEBUG
247/** Validate the trx_sys_t::rw_trx_list.
248 @return true if the list is valid */
250#endif /* UNIV_DEBUG */
251
252/** Initialize trx_sys_undo_spaces, called once during srv_start(). */
254
255/** Free the resources occupied by trx_sys_undo_spaces,
256called once during thread de-initialization. */
258
259/** The automatically created system rollback segment has this id */
260constexpr uint32_t TRX_SYS_SYSTEM_RSEG_ID = 0;
261
262/** The offset of the transaction system header on the page */
263constexpr uint32_t TRX_SYS = FSEG_PAGE_DATA;
264
265/** Transaction system header */
266/*------------------------------------------------------------- @{ */
267/** the maximum trx id or trx number modulo TRX_SYS_TRX_ID_UPDATE_MARGIN written
268 to a file page by any transaction; the assignment of transaction ids
269 continues from this number rounded up by TRX_SYS_TRX_ID_UPDATE_MARGIN plus
270 TRX_SYS_TRX_ID_UPDATE_MARGIN when the database is started */
271constexpr uint32_t TRX_SYS_TRX_ID_STORE = 0;
272/** segment header for the tablespace segment the trx system is created into */
273constexpr uint32_t TRX_SYS_FSEG_HEADER = 8;
274/** the start of the array of rollback segment specification slots */
275constexpr uint32_t TRX_SYS_RSEGS = 8 + FSEG_HEADER_SIZE;
276/*------------------------------------------------------------- @} */
277
278/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
279rollback segment. It initialized some arrays with this number of entries.
280We must remember this limit in order to keep file compatibility. */
281constexpr size_t TRX_SYS_OLD_N_RSEGS = 256;
282
283/* The system temporary tablespace was originally allocated rseg_id slot
284numbers 1 through 32 in the TRX_SYS page. But those slots were not used
285because those Rollback segments were recreated at startup and after any
286crash. These slots are now used for redo-enabled rollback segments.
287The default number of rollback segments in the temporary tablespace
288remains the same. */
289constexpr size_t TRX_SYS_OLD_TMP_RSEGS = 32;
290
291/** Maximum length of MySQL binlog file name, in bytes. */
292constexpr uint32_t TRX_SYS_MYSQL_LOG_NAME_LEN = 512;
293/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
294constexpr uint32_t TRX_SYS_MYSQL_LOG_MAGIC_N = 873422344;
295
296static_assert(UNIV_PAGE_SIZE_MIN >= 4096, "UNIV_PAGE_SIZE_MIN < 4096");
297/** The offset of the MySQL binlog offset info in the trx system header */
298#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
299/** magic number which is TRX_SYS_MYSQL_LOG_MAGIC_N if we have valid data in the
300 MySQL binlog info */
301constexpr uint32_t TRX_SYS_MYSQL_LOG_MAGIC_N_FLD = 0;
302/** high 4 bytes of the offset within that file */
303constexpr uint32_t TRX_SYS_MYSQL_LOG_OFFSET_HIGH = 4;
304/** low 4 bytes of the offset within that file */
305constexpr uint32_t TRX_SYS_MYSQL_LOG_OFFSET_LOW = 8;
306/** MySQL log file name */
307constexpr uint32_t TRX_SYS_MYSQL_LOG_NAME = 12;
308
309/** Reserve next 8 bytes for transaction number up to which GTIDs
310are persisted to table */
311#define TRX_SYS_TRX_NUM_GTID \
312 (TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_NAME_LEN)
313#define TRX_SYS_TRX_NUM_END (TRX_SYS_TRX_NUM_GTID + 8)
314/** Doublewrite buffer */
315/** @{ */
316/** The offset of the doublewrite buffer header on the trx system header page */
317#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
318/*-------------------------------------------------------------*/
319/** fseg header of the fseg containing the doublewrite buffer */
320constexpr uint32_t TRX_SYS_DOUBLEWRITE_FSEG = 0;
321/** 4-byte magic number which shows if we already have created the doublewrite
322 buffer */
324/** page number of the first page in the first sequence of 64 (=
325 FSP_EXTENT_SIZE) consecutive pages in the doublewrite buffer */
327/** page number of the first page in the second sequence of 64 consecutive pages
328 in the doublewrite buffer */
330/** we repeat TRX_SYS_DOUBLEWRITE_MAGIC, TRX_SYS_DOUBLEWRITE_BLOCK1,
331 TRX_SYS_DOUBLEWRITE_BLOCK2 so that if the trx sys header is half-written to
332 disk, we still may be able to recover the information */
333constexpr uint32_t TRX_SYS_DOUBLEWRITE_REPEAT = 12;
334/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
335we must reset the doublewrite buffer, because starting from 4.1.x the
336space id of a data page is stored into
337FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
339
340/*-------------------------------------------------------------*/
341/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
342constexpr uint32_t TRX_SYS_DOUBLEWRITE_MAGIC_N = 536853855;
343/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
344constexpr uint32_t TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N = 1783657386;
345
346/** Size of the doublewrite block in pages */
347#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
348/** @} */
349
350/** List of undo tablespace IDs. */
351class Space_Ids : public std::vector<space_id_t, ut::allocator<space_id_t>> {
352 public:
353 void sort() { std::sort(begin(), end()); }
354
356 if (size() == 0) {
357 return (false);
358 }
359
360 iterator it = std::find(begin(), end(), id);
361
362 return (it != end());
363 }
364
365 iterator find(space_id_t id) { return (std::find(begin(), end(), id)); }
366};
367
368/** Number of shards created for transactions. */
369constexpr size_t TRX_SHARDS_N = 256;
370
371/** Computes shard number for a given trx_id.
372@param[in] trx_id trx_id for which shard_no should be computed
373@return the computed shard number (number in range 0..TRX_SHARDS_N-1) */
374inline size_t trx_get_shard_no(trx_id_t trx_id) {
375 ut_ad(trx_id != 0);
376 return trx_id % TRX_SHARDS_N;
377}
378
379#ifndef UNIV_HOTBACKUP
382 size_t operator()(const trx_id_t &key) const {
383 return static_cast<size_t>(key / TRX_SHARDS_N);
384 }
385 };
386
387 using By_id = std::unordered_map<trx_id_t, trx_t *, Trx_track_hash>;
389
390 /** For observers which use Trx_shard::mutex protection: each transaction id
391 in the m_by_id is guaranteed to be at least m_min_id.
392 Writes are protected with Trx_shard::mutex.
393 Reads can be performed without any latch before accessing m_by_id,
394 but care must be taken to interpret the result -
395 @see trx_rw_is_active for details.*/
396 std::atomic<trx_id_t> m_min_id{TRX_ID_MAX};
397
398 /** A "lower bound" is a value which is guaranteed to be smaller or equal than
399 any id in any of the shards of transactions which finished calling insert(id)
400 and have not yet started a call to erase(id).
401 I.e. a shard can already contain an id smaller than this value, if insert(id)
402 has still not finished. This is sufficient guarantee, if you only care about
403 "active" transactions in the sense that insert(id) for them has happened
404 before the call and erase(id) hasn't started. For example, when you want to
405 check if a record you look at could have been modified by any of active
406 transactions, then this is a valid assumption as creating a record happens
407 after insert(id).
408
409 Each of the two values in this array may be used as a lower bound if its
410 highest bit (UPDATING_LOWER_BOUND) is not set. At most one of them has the
411 highest bit set at any given time. If both of them can be used as a lower
412 bound, it follows, that you can use maximum of the two, as the best lower
413 bound estimate. Note that this value may be way lower than actual minimum, as
414 it is only updated from time to time by
415 get_better_lower_bound_for_already_active_id(). The highest bit being set for
416 a given entry means it is currently undergoing the process of updating and its
417 value should not be used until its finished.
418
419 Transactions performing insert(id) should ensure that for each i=0,1:
420 (s_lower_bound[i]&~UPDATING_LOWER_BOUND) <= id
421
422 The property we wish to prove is:
423
424 Claim 1: The value returned by get_cheap_lower_bound_for_already_active_id()
425 is lower or equal to any trx_id for which return from insert(trx->id) has
426 happened-before the call to get_cheap_lower_bound_for_already_active_id()
427 has started and a call to erase(trx->id) (if any at all) will happened-after
428 the return from get_cheap_lower_bound_for_already_active_id().
429
430 Note how weak this property is: it's the burden of the person who wants to
431 use this Claim 1 for anything, to first establish the happens-before relations
432 by other means, and only then the Claim 1 can do any useful work at all. The
433 reason we need only such a weak claim, is because in practice we use
434 get_cheap_lower_bound_for_already_active_id() only when already having in our
435 hands a record which is a proof of activity of some transaction which must
436 have happened after it has already called insert(trx->id) successfully - this
437 establishes the needed happens-before relation: insert(trx->id) happens
438 before a write of the record, which happens-before our read, which happens
439 before the call to get_cheap_lower_bound_for_already_active_id(). As for the
440 quite strong requirement for erase() to happen-after, it is justified, because
441 it is fine for get_cheap_lower_bound_for_already_active_id() to ignore a trx
442 which is "in the middle" of erase(trx->id), as it means the trx is already
443 committing, and the call to erase() happens under shard mutex and after the
444 state was already changed to TRX_STATE_COMMITTED_IN_MEMORY under trx->mutex,
445 so whoever else is interested in the question "is trx active?" and asks it
446 under shard mutex or trx->mutex will arrive at "no", so if we answer "no" as
447 well, there's no discrepancy, and if we answer "yes", then we err on the safe
448 side which is fine as well, as the caller will then double check.
449 Hence we don't care about cases where erase(trx->id) has already started.
450
451 The proof of Claim 1, depends on a simpler claim:
452
453 Claim 2: for any moment which happens-after insert(trx_id) and happens-before
454 erase(trx_id), the value of s_lower_bound[i] (for each i) either has the
455 UPDATING_LOWER_BOUND flag or is lower-or-equal to trx_id.
456
457 Claim 2 implies Claim 1, because get_cheap_lower_bound_for_already_active_id()
458 returns a value which it loaded from s_lower_bound[i] and had no
459 UPDATING_LOWER_BOUND flag.
460
461 In what follows, it is important that all atomic operations (load, store,
462 compare_exchange_weak, fetch_xor) use memory_order::seq_cst ordering, so there
463 is a single order S in which all of them happen, which is consistent with the
464 happens-before relation (established in the Claim's assumptions, and by
465 shard's mutex).
466
467 The proof of Claim 2 is constructive. For each trx shard separately, we use
468 induction over the trx_id-s in the order they are insert()ed to the shard
469 (which happen under shard.active_rw_trxs.mutex, so are ordered by
470 happens-before, too).
471
472 The thread which does insert(trx_id) experiences following events in following
473 order:
474 1. shard.m_min_id.load()
475 Case A) saw shard.m_min_id <= trx_id and did nothing
476 Case B) saw shard.m_min_id > trx_id and did:
477 2. stored shard.m_min_id = trx_id
478 3. saw that s_lower_bound[i] <= trx_id holds already, OR enforced this
479 inequality by itself by modifying s_lower_bound[i] with CAS
480
481 As all operations on shard.m_min_id and s_lower_bound[i] are ordered in S, it
482 is meaningful to look at the sorted list of all the operations from "2.", to
483 the moment get_cheap_lower_bound_for_already_active_id() load()s the value
484 from it. There are only few ways in which s_lower_bound[i] can be modified:
485
486 a) calls to limit_to(s_lower_bound[i], x) from insert(x) or
487 get_better_lower_bound_for_already_active_id(). They can only make the
488 value smaller than it was
489
490 b) s_lower_bound[index_to_update].store(min_seen |UPDATING_LOWER_BOUND)
491 in get_better_lower_bound_for_already_active_id() which can make it
492 (let's say: arbitrarily) larger, but also sets the flag
493
494 c) s_lower_bound[index_to_update].fetch_xor(UPDATING_LOWER_BOUND) which
495 clears the flag, but doesn't change the lower 63 bits
496
497 So, we have a sequence of operations of these three types. Also (b)s which
498 sets the flag and (c)s which clear it, appear in alternating fashion, so
499 that it is meaningful to talk about periods when the flag is present and
500 those where it is not present.
501
502 We will show that no mater which Case, A) or B) occured, Claim 2 will hold.
503
504 Case A) - the m_min_id was already small so we did nothing.
505
506 There are two interesting sub-cases to consider:
507
508 A.I) the "1." falls between (b) and (c), i.e. when the flag was present
509
510 Here, we need to distinguish two sub-sub-cases:
511
512 A.I.1) the "1." happens-before the second for() loop in
513 get_cheap_lower_bound_for_already_active_id() does
514 trx_sys->shards[i].active_rw_trxs.peek().min_id().
515
516 In this case, we are fine, because from moment "1." onwards it holds that
517 m_min_id <= trx_id, and thus it will be noticed, and taken into account
518 before (c) clears the flag. From then on, any later (a) can only make it
519 smaller, and future (b) will also happen after shard.m_min_id was already
520 as we need, so will take it into account.
521
522 A.I.2) the "1." happens-after the second for() loop loaded V from
523 shard.m_min_id.
524
525 If V <= trx_id, everything is fine, as the final value published by (c) will
526 be smaller or equal to trx_id.
527 If V > trx_id, and we've just seen at "1." that it was <= trx_id, it means it
528 somehow decreased between the load() done by the second for() loop, and load()
529 done in "1." in insert(trx_id) by us. The only places which decrease it, are
530 calls to insert(..), so it must be the case that another insert(trx_id') has
531 happened for trx_id'<=trx_id which decreased the m_min_id, and executed the
532 logic for limit_to(s_lower_bound[i],trx_id'), before finishing insert(trx_id')
533 which happened before moment "1.", which in turn happened before (c), which
534 means, that the lower 63 bits of s_lower_bound[i] are <= trx_id' <= trx_id
535 already at moment "1.", and thus the value revealed by (c) will be fine.
536
537 A.II) the "1." falls outside of any (b)--(c) window, i.e. when the flag was
538 missing
539
540 This is easy case, as at "1." we saw m_min_id <= trx_id, which means (thanks
541 to shard's mutex) that insert(m_min_id) has happened-before "1." and
542 erase(m_min_id) has not yet happened, so we can use inductive assumption, to
543 show the s_shard_bound[i] had to be <= m_min_id at moment "1." as the flag
544 was not present, and thus it is also <= trx_id, and will stay like that
545 through all (a) operations, and any future (b) operation will happen after
546 we've already ensured shard's m_min_id <= trx_id, so will be noticed by scan
547 over shards, before doing (c) to clear the flag.
548
549
550 Case B) the m_min_id was too large, so we did "2." and "3."
551
552 There are two interesting sub-cases to consider:
553
554 B.I) the "3." falls between (b) and (c), i.e. when the flag was present
555
556 It means the lower 63 bits are already <= trx_id at moment "3.", and will
557 stay so until (c), which will only clear the flag. Operation (a) also can't
558 make it larger. So, it is only another (b) in future which could make it
559 larger, but that future (b) will happen after "2." which ensured shard's
560 m_min_id is <= trx_id, and any thread doing (b)--(c) end-to-end has to check
561 all the shards, to take them into account.
562
563 B.II) the "3." falls outside of any (b)--(c) window, i.e. when the flag was
564 missing
565
566 Here, similarly, at moment "3." the s_lower_bound[i] is <= trx_id, and
567 subsequent (a)s can't violate it. Also, if (b) sets the flag in future, then
568 it will only be cleared by (c) after scanning all shards, which will happen
569 after moment "2.", so will take the shard's m_min_id into account.
570
571 So, Claim 2 holds in all these cases.
572 */
573 static std::atomic<trx_id_t> s_lower_bound[2];
574 static constexpr trx_id_t UPDATING_LOWER_BOUND = trx_id_t{1} << 63;
575
576 /** This is used during get_better_lower_bound_for_already_active_id() to
577 announce that it is trying to establish new value for s_lower_bound.
578 This value is false if no such process is under way, and changed to true by
579 the only thread chosen to perform it, thus serves the purpose of "mutex".
580 The reason we don't use an std::mutex, is that we don't wish to wait, nor
581 spin, we just want to give up when somebody else already works on it. */
582 static std::atomic<bool> s_updating_lower_bound;
583 /** Performs an equivalent of if(upper_bound < a) a=upper_bound atomically,
584 ignoring, but preserving the UPDATING_LOWER_BOUND flag.
585 @param[in] a The atomic we want to limit to upper_bound
586 @param[in] upper_bound The upper_bound we want to impose on a */
587 static void limit_to(std::atomic<trx_id_t> &a, trx_id_t upper_bound) {
588 trx_id_t v = a.load();
589 while (
590 upper_bound < (v & ~UPDATING_LOWER_BOUND) &&
591 !a.compare_exchange_weak(v, upper_bound | (v & UPDATING_LOWER_BOUND))) {
592 }
593 }
594
595 public:
596 /** Returns a value which is lower or equal to id of any transaction
597 for which insert(id) happened before the call started, and erase(id)
598 has not happened before the start of the call. @see s_lower_bound
599 Note that this value never increases unless someone calls
600 @see get_better_lower_bound_for_already_active_id() */
602 trx_id_t best_bound = 0;
603 bool found = false;
604 /* The while loop handles a rare race condition where we observe
605 both entries as having UPDATING_LOWER_BOUND, because first one
606 was being updated then the later. */
607 while (!found) {
608 for (const auto &lower_bound : s_lower_bound) {
609 const auto val = lower_bound.load();
610 if (!(val & UPDATING_LOWER_BOUND)) {
611 found = true;
612 /* Any s_lower_bound which doesn't have the UPDATING_LOWER_BOUND flag
613 is correct, so we prefer to take the larger one. For an exhaustive
614 proof see s_lower_bound's doxygen. */
615 best_bound = std::max(best_bound, val);
616 }
617 }
618 }
619 return best_bound;
620 }
621 /** @see get_cheap_lower_bound_for_already_active_id() from which this
622 function differs by executing a tighter estimation. If it is indeed
623 better, then as a side effect it will bump the value of s_lower_bound
624 used by get_cheap_lower_bound_for_already_active_id()*/
626
627 By_id const &by_id() const { return m_by_id; }
628 trx_id_t min_id() const { return m_min_id.load(); }
629 trx_t *get(trx_id_t trx_id) const {
630 const auto it = m_by_id.find(trx_id);
631 trx_t *trx = it == m_by_id.end() ? nullptr : it->second;
632 /* We remove trx from active_rw_trxs and change state to
633 TRX_STATE_COMMITTED_IN_MEMORY in a same critical section protected by
634 Trx_shard's mutex, which we happen to hold here, so we expect the state
635 of trx to match its presence in that set */
636 ut_ad(trx == nullptr || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
637 return trx;
638 }
639 void insert(trx_t &trx) {
640 const trx_id_t trx_id = trx.id;
641 ut_ad(0 == m_by_id.count(trx_id));
642 m_by_id.emplace(trx_id, &trx);
643 if (trx_id < m_min_id.load(std::memory_order_relaxed)) {
644 /* It matters that our m_min_id.store() is made visible before we load any
645 of the two s_lower_bound[i]!
646 If m_min_id.store() was using just memory_order_release, it could happen,
647 that s_lower_bound[i].load() will appear small, so we will not limit it,
648 but right after that another thread starts updating s_lower_bound[i], and
649 will not see our stored m_min_id, and thus will set s_lower_bound[i], to
650 too large value. */
651 m_min_id.store(trx_id);
652 /* Ensure that both s_lower_bound[i]&~UPDATING_LOWER_BOUND are <= trx_id,
653 preserving the UPDATING_LOWER_BOUND flag if present */
654 for (auto &lower_bound : s_lower_bound) {
655 limit_to(lower_bound, trx_id);
656 }
657 }
658 }
659 void erase(trx_id_t trx_id) {
660 ut_ad(1 == m_by_id.count(trx_id));
661 m_by_id.erase(trx_id);
662 if (m_min_id.load(std::memory_order_relaxed) == trx_id) {
663 if (m_by_id.empty()) {
664 /* Note that this value is not equal to shard id modulo TRX_SHARDS_N,
665 and that changing to TRX_ID_MAX back and forth means the m_min_id is
666 not monotone over time. None of this is really a requirement for the
667 solution to work correctly, and m_min_id was never guaranteed to be
668 monotone really, as ids passed to insert(id) are not monotone. */
669 m_min_id.store(TRX_ID_MAX, std::memory_order_release);
670 } else {
671 /* We want at most 1 release store, so we use a local variable for the
672 loop. The m_by_id isn't ordered, so we find the min value by iterating
673 over all possible values in this shard. We know we will find something
674 eventually, because the shard is not empty, and we start the loop from
675 its old minimum. The number of iterations in total life of the
676 application is in practice roughly equal to the number of transactions,
677 because we visit each candidate value at most once, usually. There's an
678 edge case though: the ids passed to insert(id) are not necessarily
679 monotonically increasing, as ids are assigned independently from
680 inserting them - even though the two operations are close to each other
681 in source, the operations from two threads can get interleaved in a way
682 which makes the new minimum smaller - this is not only rare, but also
683 the range of such disorder is rather short, thus this doesn't impact
684 performance as at most just a few candidate values are rechecked. */
685 trx_id_t new_min = trx_id + TRX_SHARDS_N;
686#ifdef UNIV_DEBUG
687 // These asserts ensure while loop terminates:
688 const trx_id_t some_id = m_by_id.begin()->first;
689 ut_a(new_min <= some_id);
690 ut_a((some_id - new_min) % TRX_SHARDS_N == 0);
691#endif /* UNIV_DEBUG */
692 while (m_by_id.count(new_min) == 0) {
693 new_min += TRX_SHARDS_N;
694 }
695 m_min_id.store(new_min, std::memory_order_release);
696 }
697 }
698 }
699};
700
701/** Shard for subset of transactions. */
702struct Trx_shard {
703 /** Mapping from trx->id to trx of active rw transactions.
704 The peek() interface can only be used safely for the min_id().
705 Use latch_and_execute() interface to access other members. */
708};
709
710/** The transaction system central memory data structure. */
711struct trx_sys_t {
712 /* Members protected by neither trx_sys_t::mutex nor serialisation_mutex. */
714
715 /** @{ */
716
717 /** Multi version concurrency control manager */
718
720
721 /** Vector of pointers to rollback segments. These rsegs are iterated
722 and added to the end under a read lock. They are deleted under a write
723 lock while the vector is adjusted. They are created and destroyed in
724 single-threaded mode. */
726
727 /** Vector of pointers to rollback segments within the temp tablespace;
728 This vector is created and destroyed in single-threaded mode so it is not
729 protected by any mutex because it is read-only during multi-threaded
730 operation. */
732
733 /** Length of the TRX_RSEG_HISTORY list (update undo logs for committed
734 transactions). */
735 std::atomic<uint64_t> rseg_history_len;
736
737 /** @} */
738
739 /* Members protected by either trx_sys_t::mutex or serialisation_mutex. */
741
742 /** @{ */
743
744 /** The smallest number not yet assigned as a transaction id
745 or transaction number. This is declared as atomic because it
746 can be accessed without holding any mutex during AC-NL-RO
747 view creation. When it is used for assignment of the trx->id,
748 it is synchronized by the trx_sys_t::mutex. When it is used
749 for assignment of the trx->no, it is synchronized by the
750 trx_sys_t::serialisation_mutex. Note: it might be in parallel
751 used for both trx->id and trx->no assignments (for different
752 trx_t objects). */
753 std::atomic<trx_id_t> next_trx_id_or_no;
754
755 /** @} */
756
757 /* Members protected by serialisation_mutex. */
759
760 /** @{ */
761
762 /** Mutex to protect serialisation_list. */
764
765 /** Tracks minimal transaction id which has received trx->no, but has
766 not yet finished commit for the mtr writing the trx commit. Protected
767 by the serialisation_mutex. Ordered on the trx->no field. */
768 UT_LIST_BASE_NODE_T(trx_t, no_list) serialisation_list;
769
770#ifdef UNIV_DEBUG
771 /** Max trx number of read-write transactions added for purge. */
773#endif /* UNIV_DEBUG */
774
776
777 /* The minimum trx->no inside the serialisation_list. Protected by
778 the serialisation_mutex. Might be read without the mutex. */
779 std::atomic<trx_id_t> serialisation_min_trx_no;
780
781 /** @} */
782
783 /* Members protected by the trx_sys_t::mutex. */
785
786 /** @{ */
787
788 /** Mutex protecting most fields in this structure (the default one). */
790
792
793 /** List of active and committed in memory read-write transactions, sorted
794 on trx id, biggest first. Recovered transactions are always on this list. */
795 UT_LIST_BASE_NODE_T(trx_t, trx_list) rw_trx_list;
796
798
799 /** List of transactions created for MySQL. All user transactions are
800 on mysql_trx_list. The rw_trx_list can contain system transactions and
801 recovered transactions that will not be in the mysql_trx_list.
802 Additionally, mysql_trx_list may contain transactions that have not yet
803 been started in InnoDB. */
804 UT_LIST_BASE_NODE_T(trx_t, mysql_trx_list) mysql_trx_list;
805
806 /** Array of Read write transaction IDs for MVCC snapshot. A ReadView would
807 take a snapshot of these transactions whose changes are not visible to it.
808 We should remove transactions from the list before committing in memory and
809 releasing locks to ensure right order of removal and consistent snapshot. */
811
813
814 /** Mapping from transaction id to transaction instance. */
816
817 /** Number of transactions currently in the XA PREPARED state. */
819
820 /** True if XA PREPARED trxs are found. */
822
823 /** @} */
824
826
828 return trx_sys->shards[trx_get_shard_no(trx_id)];
829 }
830 template <typename F>
832 const ut::Location &loc) {
833 return get_shard_by_trx_id(trx_id).active_rw_trxs.latch_and_execute(
834 [&](Trx_by_id_with_min &trx_by_id_with_min) {
835 return std::forward<F>(f)(trx_by_id_with_min.get(trx_id));
836 },
837 loc);
838 }
839};
840
841#endif /* !UNIV_HOTBACKUP */
842
843/** A list of undo tablespace IDs found in the TRX_SYS page.
844This cannot be part of the trx_sys_t object because it is initialized before
845that object is created. These are the old type of undo tablespaces that do not
846have space_IDs in the reserved range nor contain an RSEG_ARRAY page. */
848
849#ifndef UNIV_HOTBACKUP
850
851/** When a trx id which is zero modulo this number (which must be a power of
852two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
853page is updated */
855
856/** Acquire the trx_sys->mutex. */
857static inline void trx_sys_mutex_enter() { mutex_enter(&trx_sys->mutex); }
858
859/** Release the trx_sys->mutex. */
860static inline void trx_sys_mutex_exit() { trx_sys->mutex.exit(); }
861
862#ifdef UNIV_DEBUG
863
864/** Test if trx_sys->mutex is owned. */
865static inline bool trx_sys_mutex_own() { return trx_sys->mutex.is_owned(); }
866
867/** Test if trx_sys->serialisation_mutex is owned. */
869 return trx_sys->serialisation_mutex.is_owned();
870}
871#endif
872
873/** Acquire the trx_sys->serialisation_mutex. */
876}
877
878/** Release the trx_sys->serialisation_mutex. */
881}
882
883#endif /* !UNIV_HOTBACKUP */
884
885#include "trx0sys.ic"
886
887#endif
uint32_t space_id_t
Tablespace identifier.
Definition: api0api.h:48
uint32_t page_no_t
Page number.
Definition: api0api.h:46
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:247
The database buffer pool high-level routines.
The MVCC read view manager.
Definition: read0read.h:44
Read view lists the trx ids of those transactions for which a consistent read should not see the modi...
Definition: read0types.h:48
This is a wrapper for a std::vector of trx_rseg_t object pointers.
Definition: trx0types.h:331
List of undo tablespace IDs.
Definition: trx0sys.h:351
void sort()
Definition: trx0sys.h:353
bool contains(space_id_t id)
Definition: trx0sys.h:355
iterator find(space_id_t id)
Definition: trx0sys.h:365
Definition: trx0sys.h:380
std::atomic< trx_id_t > m_min_id
For observers which use Trx_shard::mutex protection: each transaction id in the m_by_id is guaranteed...
Definition: trx0sys.h:396
trx_t * get(trx_id_t trx_id) const
Definition: trx0sys.h:629
static std::atomic< trx_id_t > s_lower_bound[2]
A "lower bound" is a value which is guaranteed to be smaller or equal than any id in any of the shard...
Definition: trx0sys.h:573
By_id const & by_id() const
Definition: trx0sys.h:627
trx_id_t min_id() const
Definition: trx0sys.h:628
static constexpr trx_id_t UPDATING_LOWER_BOUND
Definition: trx0sys.h:574
static trx_id_t get_better_lower_bound_for_already_active_id()
Definition: trx0sys.cc:65
static std::atomic< bool > s_updating_lower_bound
This is used during get_better_lower_bound_for_already_active_id() to announce that it is trying to e...
Definition: trx0sys.h:582
static trx_id_t get_cheap_lower_bound_for_already_active_id()
Returns a value which is lower or equal to id of any transaction for which insert(id) happened before...
Definition: trx0sys.h:601
void erase(trx_id_t trx_id)
Definition: trx0sys.h:659
static void limit_to(std::atomic< trx_id_t > &a, trx_id_t upper_bound)
Performs an equivalent of if(upper_bound < a) a=upper_bound atomically, ignoring, but preserving the ...
Definition: trx0sys.h:587
void insert(trx_t &trx)
Definition: trx0sys.h:639
std::unordered_map< trx_id_t, trx_t *, Trx_track_hash > By_id
Definition: trx0sys.h:387
By_id m_by_id
Definition: trx0sys.h:388
Page identifier.
Definition: buf0types.h:207
The low-level file system.
constexpr uint32_t FSEG_PAGE_DATA
On a page of any file segment, data may be put starting from this offset.
Definition: fsp0types.h:79
constexpr uint32_t FSEG_HEADER_SIZE
Length of the file system header, in bytes.
Definition: fsp0types.h:94
#define F
Definition: jit_executor_value.cc:374
The memory management.
Mini-transaction buffer.
Definition: os0file.h:89
Container::const_iterator find(const Container &c, Value &&value)
Definition: generic.h:39
ValueType max(X &&first)
Definition: gtid.h:103
const char * begin(const char *const c)
Definition: base64.h:44
size_t size(const char *const c)
Definition: base64.h:46
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
constexpr size_t INNODB_CACHE_LINE_SIZE
CPU cache line size.
Definition: ut0cpu_cache.h:41
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2876
Index page routines.
required string key
Definition: replication_asynchronous_connection_failover.proto:60
Definition: trx0sys.h:381
size_t operator()(const trx_id_t &key) const
Definition: trx0sys.h:382
Shard for subset of transactions.
Definition: trx0sys.h:702
ut::Cacheline_padded< ut::Guarded< Trx_by_id_with_min, LATCH_ID_TRX_SYS_SHARD > > active_rw_trxs
Mapping from trx->id to trx of active rw transactions.
Definition: trx0sys.h:707
Mini-transaction handle and buffer.
Definition: mtr0mtr.h:177
The transaction system central memory data structure.
Definition: trx0sys.h:711
UT_LIST_BASE_NODE_T(trx_t, no_list) serialisation_list
Tracks minimal transaction id which has received trx->no, but has not yet finished commit for the mtr...
TrxSysMutex serialisation_mutex
Mutex to protect serialisation_list.
Definition: trx0sys.h:763
char pad3[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:775
UT_LIST_BASE_NODE_T(trx_t, mysql_trx_list) mysql_trx_list
List of transactions created for MySQL.
UT_LIST_BASE_NODE_T(trx_t, trx_list) rw_trx_list
List of active and committed in memory read-write transactions, sorted on trx id, biggest first.
std::atomic< trx_id_t > next_trx_id_or_no
The smallest number not yet assigned as a transaction id or transaction number.
Definition: trx0sys.h:753
std::atomic< uint64_t > rseg_history_len
Length of the TRX_RSEG_HISTORY list (update undo logs for committed transactions).
Definition: trx0sys.h:735
auto latch_and_execute_with_active_trx(trx_id_t trx_id, F &&f, const ut::Location &loc)
Definition: trx0sys.h:831
std::atomic< trx_id_t > serialisation_min_trx_no
Definition: trx0sys.h:779
Rsegs rsegs
Vector of pointers to rollback segments.
Definition: trx0sys.h:725
char pad7[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:812
char pad2[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:758
trx_id_t rw_max_trx_no
Max trx number of read-write transactions added for purge.
Definition: trx0sys.h:772
Rsegs tmp_rsegs
Vector of pointers to rollback segments within the temp tablespace; This vector is created and destro...
Definition: trx0sys.h:731
char pad4[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:784
char pad1[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:740
bool found_prepared_trx
True if XA PREPARED trxs are found.
Definition: trx0sys.h:821
MVCC * mvcc
Multi version concurrency control manager.
Definition: trx0sys.h:719
Trx_shard & get_shard_by_trx_id(trx_id_t trx_id)
Definition: trx0sys.h:827
Trx_shard shards[TRX_SHARDS_N]
Mapping from transaction id to transaction instance.
Definition: trx0sys.h:815
char pad5[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:791
ulint n_prepared_trx
Number of transactions currently in the XA PREPARED state.
Definition: trx0sys.h:818
TrxSysMutex mutex
Mutex protecting most fields in this structure (the default one).
Definition: trx0sys.h:789
char pad0[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:713
char pad_after[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:825
char pad6[ut::INNODB_CACHE_LINE_SIZE]
Definition: trx0sys.h:797
trx_ids_t rw_trx_ids
Array of Read write transaction IDs for MVCC snapshot.
Definition: trx0sys.h:810
Definition: trx0trx.h:675
trx_id_t id
transaction id
Definition: trx0trx.h:727
A utility wrapper class, which adds padding at the end of the wrapped structure, so that the next obj...
Definition: ut0cpu_cache.h:55
Definition: ut0core.h:36
Space_Ids * trx_sys_undo_spaces
A list of undo tablespace IDs found in the TRX_SYS page.
Definition: trx0sys.cc:810
constexpr size_t TRX_SYS_OLD_TMP_RSEGS
Definition: trx0sys.h:289
constexpr size_t TRX_SHARDS_N
Number of shards created for transactions.
Definition: trx0sys.h:369
constexpr trx_id_t TRX_SYS_TRX_ID_WRITE_MARGIN
When a trx id which is zero modulo this number (which must be a power of two) is assigned,...
Definition: trx0sys.h:854
static void trx_sysf_rseg_set_page_no(trx_sysf_t *sys_header, ulint i, page_no_t page_no, mtr_t *mtr)
Set the page number of the nth rollback segment slot in the trx system file copy.
constexpr uint32_t TRX_SYS_MYSQL_LOG_NAME
MySQL log file name.
Definition: trx0sys.h:307
void trx_sys_update_mysql_binlog_offset(trx_t *trx, mtr_t *mtr)
Updates the offset information about the end of the MySQL binlog entry which corresponds to the trans...
Definition: trx0sys.cc:363
constexpr size_t TRX_SYS_OLD_N_RSEGS
Definition: trx0sys.h:281
constexpr uint32_t TRX_SYS_MYSQL_LOG_OFFSET_HIGH
high 4 bytes of the offset within that file
Definition: trx0sys.h:303
size_t trx_get_shard_no(trx_id_t trx_id)
Computes shard number for a given trx_id.
Definition: trx0sys.h:374
constexpr uint32_t TRX_SYS_DOUBLEWRITE_MAGIC
4-byte magic number which shows if we already have created the doublewrite buffer
Definition: trx0sys.h:323
page_no_t trx_sysf_rseg_find_page_no(ulint rseg_id)
Find the page number in the TRX_SYS page for a given slot/rseg_id.
Definition: trx0sys.cc:384
static bool trx_sys_hdr_page(const page_id_t &page_id)
Checks if a page address is the trx sys header page.
static void trx_sys_mutex_exit()
Release the trx_sys->mutex.
Definition: trx0sys.h:860
static void trx_write_trx_id(byte *ptr, trx_id_t id)
Writes a trx id to an index page.
void trx_sys_get_binlog_prepared(std::vector< trx_id_t > &trx_ids)
Get a list of all binlog prepared transactions.
Definition: trx0sys.cc:211
static void trx_sys_serialisation_mutex_exit()
Release the trx_sys->serialisation_mutex.
Definition: trx0sys.h:879
void trx_sys_after_pre_dd_shutdown_validate()
Validates lists of transactions at the very end of the pre-dd-shutdown phase.
Definition: trx0sys.cc:734
static void trx_sys_serialisation_mutex_enter()
Acquire the trx_sys->serialisation_mutex.
Definition: trx0sys.h:874
constexpr uint32_t TRX_SYS_SYSTEM_RSEG_ID
The automatically created system rollback segment has this id.
Definition: trx0sys.h:260
static trx_t * trx_rw_is_active(trx_id_t trx_id, bool do_ref_count)
Checks if a rw transaction with the given id is active.
static trx_sysf_t * trx_sysf_get(mtr_t *mtr)
Gets a pointer to the transaction system file copy and x-locks its page.
void trx_sys_read_binlog_position(char *file, uint64_t &offset)
Get current binary log positions stored.
Definition: trx0sys.cc:295
constexpr uint32_t TRX_SYS
The offset of the transaction system header on the page.
Definition: trx0sys.h:263
void trx_sys_create(void)
Creates the trx_sys instance and initializes purge_queue and mutex.
Definition: trx0sys.cc:609
trx_id_t trx_sys_oldest_trx_no()
Definition: trx0sys.cc:201
size_t trx_sys_recovered_active_trxs_count()
Reads number of recovered transactions which have state equal to TRX_STATE_ACTIVE (so are not prepare...
Definition: trx0sys.cc:772
void trx_sys_undo_spaces_deinit()
Free the resources occupied by trx_sys_undo_spaces, called once during thread de-initialization.
Definition: trx0sys.cc:822
constexpr uint32_t TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N
Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED.
Definition: trx0sys.h:344
void trx_sys_before_pre_dd_shutdown_validate()
Validates lists of transactions at the very beginning of the pre-dd-shutdown phase.
Definition: trx0sys.cc:711
constexpr uint32_t TRX_SYS_MYSQL_LOG_MAGIC_N
Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD.
Definition: trx0sys.h:294
constexpr uint32_t TRX_SYS_DOUBLEWRITE_FSEG
fseg header of the fseg containing the doublewrite buffer
Definition: trx0sys.h:320
purge_pq_t * trx_sys_init_at_db_start(void)
Creates and initializes the central memory structures for the transaction system.
Definition: trx0sys.cc:487
ulint trx_sysf_rseg_find_free(mtr_t *mtr)
Look for a free slot for a rollback segment in the trx system file copy.
Definition: trx0sys.cc:401
void trx_sys_close(void)
Shutdown/Close the transaction system.
Definition: trx0sys.cc:655
static space_id_t trx_sysf_rseg_get_space(trx_sysf_t *sys_header, ulint i, mtr_t *mtr)
Gets the space of the nth rollback segment slot in the trx system file copy.
constexpr uint32_t TRX_SYS_DOUBLEWRITE_REPEAT
we repeat TRX_SYS_DOUBLEWRITE_MAGIC, TRX_SYS_DOUBLEWRITE_BLOCK1, TRX_SYS_DOUBLEWRITE_BLOCK2 so that i...
Definition: trx0sys.h:333
trx_id_t trx_sys_allocate_trx_no()
Allocates a new transaction number (for trx->no).
Definition: trx0sys.ic:263
static trx_id_t trx_read_trx_id(const byte *ptr)
Reads a trx id from an index page.
static bool trx_sys_need_rollback()
Determine if there are incomplete transactions in the system.
bool trx_sys_validate_trx_list()
Validate the trx_sys_t::rw_trx_list.
Definition: trx0sys.cc:789
static void trx_sysf_rseg_set_space(trx_sysf_t *sys_header, ulint i, space_id_t space, mtr_t *mtr)
Sets the space id of the nth rollback segment slot in the trx system file copy.
static void trx_sys_rw_trx_add(trx_t *trx)
Add the transaction to the RW transaction set.
constexpr uint32_t TRX_SYS_MYSQL_LOG_NAME_LEN
Maximum length of MySQL binlog file name, in bytes.
Definition: trx0sys.h:292
bool trx_sys_write_binlog_position(const char *last_file, uint64_t last_offset, const char *file, uint64_t offset)
Update binary log position if not already updated.
Definition: trx0sys.cc:347
void trx_sys_create_sys_pages(void)
Creates and initializes the transaction system at the database creation.
Definition: trx0sys.cc:643
constexpr uint32_t TRX_SYS_DOUBLEWRITE_BLOCK1
page number of the first page in the first sequence of 64 (= FSP_EXTENT_SIZE) consecutive pages in th...
Definition: trx0sys.h:326
void trx_sys_undo_spaces_init()
Initialize trx_sys_undo_spaces, called once during srv_start().
Definition: trx0sys.cc:813
static bool trx_sys_mutex_own()
Test if trx_sys->mutex is owned.
Definition: trx0sys.h:865
constexpr uint32_t TRX_SYS_TRX_ID_STORE
Transaction system header.
Definition: trx0sys.h:271
void trx_sys_after_background_threads_shutdown_validate()
Validates lists of transactions after all background threads of InnoDB exited during shutdown of MySQ...
Definition: trx0sys.cc:766
constexpr uint32_t TRX_SYS_MYSQL_LOG_OFFSET_LOW
low 4 bytes of the offset within that file
Definition: trx0sys.h:305
trx_id_t trx_sys_allocate_trx_id()
Allocates a new transaction id (for trx->id).
Definition: trx0sys.ic:258
void trx_sys_persist_gtid_num(trx_id_t gtid_trx_no)
Persist transaction number limit below which all transaction GTIDs are persisted to disk table.
Definition: trx0sys.cc:190
constexpr uint32_t TRX_SYS_DOUBLEWRITE_MAGIC_N
Contents of TRX_SYS_DOUBLEWRITE_MAGIC.
Definition: trx0sys.h:342
static void trx_sys_mutex_enter()
Acquire the trx_sys->mutex.
Definition: trx0sys.h:857
static page_no_t trx_sysf_rseg_get_page_no(trx_sysf_t *sys_header, ulint i, mtr_t *mtr)
Gets the page number of the nth rollback segment slot in the trx system file copy.
uint trx_rseg_n_slots_debug
Definition: trx0sys.cc:147
constexpr uint32_t TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
magic number which is TRX_SYS_MYSQL_LOG_MAGIC_N if we have valid data in the MySQL binlog info
Definition: trx0sys.h:301
trx_sys_t * trx_sys
The transaction system.
Definition: trx0sys.cc:60
constexpr uint32_t TRX_SYS_DOUBLEWRITE_BLOCK2
page number of the first page in the second sequence of 64 consecutive pages in the doublewrite buffe...
Definition: trx0sys.h:329
constexpr uint32_t TRX_SYS_FSEG_HEADER
segment header for the tablespace segment the trx system is created into
Definition: trx0sys.h:273
constexpr uint32_t TRX_SYS_RSEGS
the start of the array of rollback segment specification slots
Definition: trx0sys.h:275
static bool trx_sys_serialisation_mutex_own()
Test if trx_sys->serialisation_mutex is owned.
Definition: trx0sys.h:868
trx_id_t trx_sys_get_next_trx_id_or_no()
Retrieves a next value that will be allocated if trx_sys_allocate_trx_id() or trx_sys_allocate_trx_id...
Definition: trx0sys.ic:274
constexpr uint32_t TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED
If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, we must reset the doublewrite buffer...
Definition: trx0sys.h:338
Transaction system.
The transaction.
static bool trx_state_eq(const trx_t *trx, trx_state_t state)
Determines if a transaction is in the given state.
Transaction system global type definitions.
@ TRX_STATE_COMMITTED_IN_MEMORY
Definition: trx0types.h:93
constexpr trx_id_t TRX_ID_MAX
Maximum transaction identifier.
Definition: trx0types.h:145
std::priority_queue< TrxUndoRsegs, std::vector< TrxUndoRsegs, ut::allocator< TrxUndoRsegs > >, TrxUndoRsegs > purge_pq_t
Definition: trx0types.h:631
std::vector< trx_id_t, ut::allocator< trx_id_t > > trx_ids_t
Definition: trx0types.h:633
ib_id_t trx_id_t
Transaction identifier (DB_TRX_ID, DATA_TRX_ID)
Definition: trx0types.h:138
byte trx_sysf_t
File objects.
Definition: trx0types.h:155
ib_mutex_t TrxSysMutex
Definition: trx0types.h:174
Version control for database, common definitions, and include files.
constexpr uint32_t UNIV_PAGE_SIZE_MIN
Minimum page size InnoDB currently supports.
Definition: univ.i:321
unsigned long int ulint
Definition: univ.i:406
Utilities for byte operations.
Utilities related to class lifecycle.
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:105
#define ut_a(EXPR)
Abort execution if EXPR does not evaluate to nonzero.
Definition: ut0dbg.h:93
The ut::Guarded template which protects access to another class with mutex.
List utilities.
Policy based mutexes.
#define mutex_enter(M)
Definition: ut0mutex.h:117