MySQL 9.2.0
Source Code Documentation
rpl_commit_stage_manager.h
Go to the documentation of this file.
1/* Copyright (c) 2019, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef RPL_COMMIT_STAGE_MANAGER
25#define RPL_COMMIT_STAGE_MANAGER
26
27#include <atomic>
28#include <utility>
29
30#include "my_dbug.h"
33#include "sql/sql_class.h"
34#include "thr_mutex.h"
35
36class THD;
37
38/**
39 Class for maintaining the commit stages for binary log group commit.
40 */
42 public:
45
46 public:
48
50
51 bool is_empty() const { return m_first == nullptr; }
52
53 /**
54 Append a linked list of threads to the queue.
55
56 @param[in] first Linked list of threads to be appended to queue
57
58 @retval true The queue was empty before this operation.
59 @retval false The queue was non-empty before this operation.
60 */
61 bool append(THD *first);
62
63 /**
64 Fetch the entire queue for a stage. It is a wrapper over
65 fetch_and_empty() and acquires queue lock before fetching
66 and emptying the queue threads.
67
68 @return Pointer to the first session of the queue.
69 */
71
72 /**
73 Fetch the entire queue for a stage. It is a wrapper over
74 fetch_and_empty(). The caller must acquire queue lock before
75 calling this function.
76
77 @return Pointer to the first session of the queue.
78 */
80
81 /**
82 Remove first member from the queue
83
84 @retval Returns std::pair<bool, THD *> object.
85 The first boolean value of pair if true determines queue
86 is not empty, and false determines queue is empty.
87 The second value returns the first removed member.
88 */
89 std::pair<bool, THD *> pop_front();
90
91 /**
92 Get number of elements in the queue.
93
94 @retval Returns number of element in the queue.
95 */
96 inline int32 get_size() { return m_size.load(); }
97
98 /**
99 Fetch the first thread of the queue.
100
101 @return first thread of the queue.
102 */
103 THD *get_leader() { return m_first; }
104
105 void lock() {
108 }
109
111
113
114 private:
115 /**
116 Fetch the entire queue for a stage.
117
118 @retval This will fetch the entire queue in one go.
119 */
121
122 /**
123 Pointer to the first thread in the queue, or nullptr if the queue is
124 empty.
125 */
127
128 /**
129 Pointer to the location holding the end of the queue.
130
131 This is either @c &first, or a pointer to the @c next_to_commit of
132 the last thread that is enqueued.
133 */
135
136 /** size of the queue */
137 std::atomic<int32> m_size;
138
139 /** Lock for protecting the queue. */
141
142 /*
143 This attribute did not have the desired effect, at least not according
144 to -fsanitize=undefined with gcc 5.2.1
145 */
146 }; // MY_ATTRIBUTE((aligned(CPU_LEVEL1_DCACHE_LINESIZE)));
147
148 private:
150
152
154
155 public:
156 /**
157 Fetch Commit_stage_manager class instance.
158
159 @return Reference to the Commit_stage_manager class instance.
160 */
162
163 /**
164 Constants for queues for different stages.
165 */
166 enum StageID {
173 };
174
175 /**
176 Initializes m_stage_cond_binlog, m_stage_cond_commit_order,
177 m_stage_cond_leader condition variables and m_lock_done mutex.
178
179 The binlog follower threads blocks on m_stage_cond_binlog condition
180 variable till signalled to wake up from leader thread. And similarly
181 commit order follower threads blocks on m_stage_cond_commit_order
182 condition variable till signalled to wake up from leader thread.
183
184 The first binlog thread supposed to be leader finds that commit order queue
185 is not empty then it blocks on m_stage_cond_leader till commit order leader
186 signals it to awake and become new leader.
187
188 m_lock_done mutex is shared by all three stages.
189
190 @param key_LOCK_flush_queue mutex instrumentation key
191 @param key_LOCK_sync_queue mutex instrumentation key
192 @param key_LOCK_commit_queue mutex instrumentation key
193 @param key_LOCK_after_commit_queue mutex instrumentation key
194 @param key_LOCK_done mutex instrumentation key
195 @param key_LOCK_wait_for_group_turn mutex instrumentation key
196 @param key_COND_done cond instrumentation key
197 @param key_COND_flush_queue cond instrumentation key
198 @param key_COND_wait_for_group_turn cond instrumentation key
199 */
200 void init(PSI_mutex_key key_LOCK_flush_queue,
201 PSI_mutex_key key_LOCK_sync_queue,
202 PSI_mutex_key key_LOCK_commit_queue,
203 PSI_mutex_key key_LOCK_after_commit_queue,
204 PSI_mutex_key key_LOCK_done,
205 PSI_mutex_key key_LOCK_wait_for_group_turn,
206 PSI_cond_key key_COND_done, PSI_cond_key key_COND_flush_queue,
207 PSI_cond_key key_COND_wait_for_group_turn);
208
209 /**
210 Deinitializes m_stage_cond_binlog, m_stage_cond_commit_order,
211 m_stage_cond_leader condition variables and m_lock_done mutex.
212 */
213 void deinit();
214
215 /**
216 Checks if the THD session parameter BGC ticket is active and
217 the BGC back ticket was incremented.
218
219 @param thd The THD session that holds the ticket to check.
220
221 @return True if the THD session parameter BGC ticket is active and
222 the BGC back ticket was incremented, false otherwise.
223 */
225
226 /**
227 Waits for the THD session parameter underlying BGC ticket to become
228 active.
229
230 @param thd The THD session that holds the ticket to wait for.
231 @param update_ticket_manager Indicates whether to mark ticket
232 as consumed by the session (add session to processed sessions)
233 after the ticket is opened for processing.
234 */
235 void wait_for_ticket_turn(THD *thd, bool update_ticket_manager = true);
236
237 /**
238 Appends the given THD session object to the given stage queue. It
239 verifies that the given session's ticket is the active ticket, if not,
240 waits on `m_cond_wait_for_ticket_turn` condition variable until it is.
241
242 @param stage The stage to add the THD parameter to.
243 @param thd The THD session object to queue.
244
245 @return True if the session is a group leader, false otherwise.
246 */
247 bool append_to(StageID stage, THD *thd);
248
249 /**
250 Enroll a set of sessions for a stage.
251
252 This will queue the session thread for writing and flushing.
253
254 If the thread being queued is assigned as stage leader, it will
255 return immediately.
256
257 If wait_if_follower is true the thread is not the stage leader,
258 the thread will be wait for the queue to be processed by the
259 leader before it returns.
260 In DBUG-ON version the follower marks is preempt status as ready.
261
262 The session threads entering this function acquires mutexes, and few of
263 them are not released while exiting based on thread and stage type.
264 - A binlog leader (returning true when stage!=COMMIT_ORDER_FLUSH_STAGE) will
265 acquire the stage mutex in this function and not release it.
266 - A commit order leader of the flush stage (returning true when
267 stage==COMMIT_ORDER_FLUSH_STAGE) will acquire both the stage mutex and the
268 flush queue mutex in this function, and not release them.
269 - A follower (returning false) will release any mutexes it takes, before
270 returning from the function.
271
272 @param[in] stage Stage identifier for the queue to append to.
273 @param[in] first Queue to append.
274 @param[in] stage_mutex
275 Pointer to the currently held stage mutex, or nullptr if we're
276 not in a stage, that will be released when changing stage.
277 @param[in] enter_mutex
278 Pointer to the mutex that will be taken when changing stage.
279
280 @retval true Thread is stage leader.
281 @retval false Thread was not stage leader and processing has been done.
282 */
283 bool enroll_for(StageID stage, THD *first, mysql_mutex_t *stage_mutex,
284 mysql_mutex_t *enter_mutex);
285
286 /**
287 Remove first member from the queue for given stage
288
289 @retval Returns std::pair<bool, THD *> object.
290 The first boolean value of pair if true determines queue
291 is not empty, and false determines queue is empty.
292 The second value returns the first removed member.
293 */
294 std::pair<bool, THD *> pop_front(StageID stage) {
295 return m_queue[stage].pop_front();
296 }
297
298#ifndef NDEBUG
299 /**
300 The method ensures the follower's execution path can be preempted
301 by the leader's thread.
302 Preempt status of @c head follower is checked to engange the leader
303 into waiting when set.
304
305 @param head THD* of a follower thread
306 */
307 void clear_preempt_status(THD *head);
308#endif
309
310 /**
311 Fetch the entire queue and empty it. It acquires queue lock before fetching
312 and emptying the queue threads.
313
314 @param[in] stage Stage identifier for the queue to append to.
315
316 @return Pointer to the first session of the queue.
317 */
319
320 /**
321 Fetch the entire queue and empty it. The caller must acquire queue lock
322 before calling this function.
323
324 @param[in] stage Stage identifier for the queue to append to.
325
326 @return Pointer to the first session of the queue.
327 */
329
330 /**
331 Introduces a wait operation on the executing thread. The
332 waiting is done until the timeout elapses or count is
333 reached (whichever comes first).
334
335 If count == 0, then the session will wait until the timeout
336 elapses. If timeout == 0, then there is no waiting.
337
338 @param usec the number of microseconds to wait.
339 @param count wait for as many as count to join the queue the
340 session is waiting on
341 @param stage which stage queue size to compare count against.
342 */
343 void wait_count_or_timeout(ulong count, long usec, StageID stage);
344
345 /**
346 The function is called after follower thread are processed by leader,
347 to unblock follower threads.
348
349 @param queue the thread list which needs to ne unblocked
350 @param stage Stage identifier current thread belong to.
351 */
353
354 /**
355 Signals threads waiting on their BGC ticket turn.
356
357 @param force Whether or not to force the signaling, despit the state of
358 the ticket manager.
359 */
360 void signal_end_of_ticket(bool force = false);
361 /**
362 Updates the THD session object underlying BGC context.
363
364 @param thd The THD object to update the BGC context for.
365 */
367 /**
368 Adds the given session count to the total of processed sessions in the
369 ticket manager active window, ends the active window if possible and
370 notifies other threads that are waiting for a given ticket to have an
371 active processing window.
372
373 @param sessions_count The number of sessions to add to the ticket
374 manager processed sessions count.
375 @param session_ticket The session ticket (used for validations).
376 */
377 void update_ticket_manager(std::uint64_t sessions_count,
378 const binlog::BgcTicket &session_ticket);
379 /**
380 Waits for the session's ticket, if needed, and resets the session's
381 ticket context.
382
383 @param thd The THD sessions object to finish the ticket's related work.
384 */
385 void finish_session_ticket(THD *thd);
386
387 /**
388 This function gets called after transactions are flushed to the engine
389 i.e. after calling ha_flush_logs, to unblock commit order thread list
390 which are not needed to wait for other stages.
391
392 @param first the thread list which needs to ne unblocked
393 */
395
396 /**
397 Wrapper on Mutex_queue lock(), acquires lock on stage queue.
398
399 @param[in] stage Stage identifier for the queue to append to.
400 */
401 void lock_queue(StageID stage) { m_queue[stage].lock(); }
402
403 /**
404 Wrapper on Mutex_queue unlock(), releases lock on stage queue.
405
406 @param[in] stage Stage identifier for the queue to append to.
407 */
408 void unlock_queue(StageID stage) { m_queue[stage].unlock(); }
409
410 /**
411 Disables the ability for session BGC tickets to be set manually.
412 */
413 static void disable_manual_session_tickets();
414 /**
415 Enables the ability for session BGC tickets to be set manually.
416 */
417 static void enable_manual_session_tickets();
418
419 private:
420 /** check if Commit_stage_manager variables already initialized. */
422
423 /**
424 Queues for sessions.
425
426 We need five queues:
427 - Binlog flush queue: transactions that are going to be flushed to the
428 engine and written to the binary log.
429 - Commit order flush queue: transactions that are not going to write the
430 binlog at all, but participate in the beginning
431 of the group commit, up to and including the
432 engine flush.
433 - Sync queue: transactions that are going to be synced to disk
434 - Commit queue: transactions that are going to to be committed
435 (when binlog_order_commit=1).
436 - After commit queue: transactions for which after commit hook is to be
437 executed.
438 */
440
441 /**
442 The binlog leader waits on this condition variable till it is indicated
443 to wake up. If binlog flush queue gets first thread in the queue but
444 by then commit order flush queue has already elected leader. The the
445 first thread of binlog queue waits on this condition variable and get
446 signalled to wake up from commit order flush queue leader later.
447 */
449
450 /**
451 Condition variable to indicate that the binlog threads can wake up
452 and continue.
453 */
455
456 /**
457 Condition variable to indicate that the flush to storage engine
458 is done and commit order threads can again wake up and continue.
459 */
461
462 /** Mutex used for the condition variable above */
464
465 /** Mutex used for the stage level locks */
467
468#ifndef NDEBUG
469 /** Save pointer to leader thread which is used later to awake leader */
471
472 /** Flag is set by Leader when it starts waiting for follower's all-clear */
474
475 /** Condition variable to indicate a follower started waiting for commit */
477#endif
478
479 /** Condition variable to wait for a given ticket to become active. */
481 /** Mutex to protect the wait for a given ticket to become active. */
483};
484
485#endif /*RPL_COMMIT_STAGE_MANAGER*/
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:251
Definition: rpl_commit_stage_manager.h:43
THD * fetch_and_empty_acquire_lock()
Fetch the entire queue for a stage.
Definition: rpl_commit_stage_manager.cc:413
THD * fetch_and_empty_skip_acquire_lock()
Fetch the entire queue for a stage.
Definition: rpl_commit_stage_manager.cc:420
std::atomic< int32 > m_size
size of the queue
Definition: rpl_commit_stage_manager.h:137
mysql_mutex_t * m_lock
Lock for protecting the queue.
Definition: rpl_commit_stage_manager.h:140
bool is_empty() const
Definition: rpl_commit_stage_manager.h:51
bool append(THD *first)
Append a linked list of threads to the queue.
Definition: rpl_commit_stage_manager.cc:42
void assert_owner()
Definition: rpl_commit_stage_manager.h:112
THD * get_leader()
Fetch the first thread of the queue.
Definition: rpl_commit_stage_manager.h:103
void init(mysql_mutex_t *lock)
Definition: rpl_commit_stage_manager.h:49
int32 get_size()
Get number of elements in the queue.
Definition: rpl_commit_stage_manager.h:96
void unlock()
Definition: rpl_commit_stage_manager.h:110
void lock()
Definition: rpl_commit_stage_manager.h:105
THD ** m_last
Pointer to the location holding the end of the queue.
Definition: rpl_commit_stage_manager.h:134
THD * fetch_and_empty()
Fetch the entire queue for a stage.
Definition: rpl_commit_stage_manager.cc:425
Mutex_queue()
Definition: rpl_commit_stage_manager.h:47
std::pair< bool, THD * > pop_front()
Remove first member from the queue.
Definition: rpl_commit_stage_manager.cc:76
THD * m_first
Pointer to the first thread in the queue, or nullptr if the queue is empty.
Definition: rpl_commit_stage_manager.h:126
Class for maintaining the commit stages for binary log group commit.
Definition: rpl_commit_stage_manager.h:41
const Commit_stage_manager & operator=(const Commit_stage_manager &)=delete
void update_session_ticket_state(THD *thd)
Updates the THD session object underlying BGC context.
Definition: rpl_commit_stage_manager.cc:516
void init(PSI_mutex_key key_LOCK_flush_queue, PSI_mutex_key key_LOCK_sync_queue, PSI_mutex_key key_LOCK_commit_queue, PSI_mutex_key key_LOCK_after_commit_queue, PSI_mutex_key key_LOCK_done, PSI_mutex_key key_LOCK_wait_for_group_turn, PSI_cond_key key_COND_done, PSI_cond_key key_COND_flush_queue, PSI_cond_key key_COND_wait_for_group_turn)
Initializes m_stage_cond_binlog, m_stage_cond_commit_order, m_stage_cond_leader condition variables a...
Definition: rpl_commit_stage_manager.cc:101
mysql_mutex_t m_lock_done
Mutex used for the condition variable above.
Definition: rpl_commit_stage_manager.h:463
void wait_for_ticket_turn(THD *thd, bool update_ticket_manager=true)
Waits for the THD session parameter underlying BGC ticket to become active.
Definition: rpl_commit_stage_manager.cc:186
std::pair< bool, THD * > pop_front(StageID stage)
Remove first member from the queue for given stage.
Definition: rpl_commit_stage_manager.h:294
mysql_cond_t m_stage_cond_leader
The binlog leader waits on this condition variable till it is indicated to wake up.
Definition: rpl_commit_stage_manager.h:448
void deinit()
Deinitializes m_stage_cond_binlog, m_stage_cond_commit_order, m_stage_cond_leader condition variables...
Definition: rpl_commit_stage_manager.cc:150
void finish_session_ticket(THD *thd)
Waits for the session's ticket, if needed, and resets the session's ticket context.
Definition: rpl_commit_stage_manager.cc:541
bool leader_await_preempt_status
Flag is set by Leader when it starts waiting for follower's all-clear.
Definition: rpl_commit_stage_manager.h:473
mysql_cond_t m_stage_cond_binlog
Condition variable to indicate that the binlog threads can wake up and continue.
Definition: rpl_commit_stage_manager.h:454
void clear_preempt_status(THD *head)
The method ensures the follower's execution path can be preempted by the leader's thread.
Definition: rpl_commit_stage_manager.cc:561
Mutex_queue m_queue[STAGE_COUNTER]
Queues for sessions.
Definition: rpl_commit_stage_manager.h:439
Commit_stage_manager(const Commit_stage_manager &)=delete
void signal_end_of_ticket(bool force=false)
Signals threads waiting on their BGC ticket turn.
Definition: rpl_commit_stage_manager.cc:501
mysql_cond_t m_stage_cond_commit_order
Condition variable to indicate that the flush to storage engine is done and commit order threads can ...
Definition: rpl_commit_stage_manager.h:460
mysql_mutex_t m_lock_wait_for_ticket_turn
Mutex to protect the wait for a given ticket to become active.
Definition: rpl_commit_stage_manager.h:482
mysql_cond_t m_cond_wait_for_ticket_turn
Condition variable to wait for a given ticket to become active.
Definition: rpl_commit_stage_manager.h:480
bool append_to(StageID stage, THD *thd)
Appends the given THD session object to the given stage queue.
Definition: rpl_commit_stage_manager.cc:229
void update_ticket_manager(std::uint64_t sessions_count, const binlog::BgcTicket &session_ticket)
Adds the given session count to the total of processed sessions in the ticket manager active window,...
Definition: rpl_commit_stage_manager.cc:525
mysql_mutex_t m_queue_lock[STAGE_COUNTER - 1]
Mutex used for the stage level locks.
Definition: rpl_commit_stage_manager.h:466
StageID
Constants for queues for different stages.
Definition: rpl_commit_stage_manager.h:166
@ COMMIT_ORDER_FLUSH_STAGE
Definition: rpl_commit_stage_manager.h:171
@ COMMIT_STAGE
Definition: rpl_commit_stage_manager.h:169
@ BINLOG_FLUSH_STAGE
Definition: rpl_commit_stage_manager.h:167
@ SYNC_STAGE
Definition: rpl_commit_stage_manager.h:168
@ AFTER_COMMIT_STAGE
Definition: rpl_commit_stage_manager.h:170
@ STAGE_COUNTER
Definition: rpl_commit_stage_manager.h:172
static Commit_stage_manager & get_instance()
Fetch Commit_stage_manager class instance.
Definition: rpl_commit_stage_manager.cc:574
static void enable_manual_session_tickets()
Enables the ability for session BGC tickets to be set manually.
Definition: rpl_commit_stage_manager.cc:556
THD * fetch_queue_acquire_lock(StageID stage)
Fetch the entire queue and empty it.
Definition: rpl_commit_stage_manager.cc:466
bool m_is_initialized
check if Commit_stage_manager variables already initialized.
Definition: rpl_commit_stage_manager.h:421
Commit_stage_manager()
Definition: rpl_commit_stage_manager.h:149
void wait_count_or_timeout(ulong count, long usec, StageID stage)
Introduces a wait operation on the executing thread.
Definition: rpl_commit_stage_manager.cc:443
static void disable_manual_session_tickets()
Disables the ability for session BGC tickets to be set manually.
Definition: rpl_commit_stage_manager.cc:549
bool is_ticket_on_its_turn_and_back_ticket_incremented(THD *thd) const
Checks if the THD session parameter BGC ticket is active and the BGC back ticket was incremented.
Definition: rpl_commit_stage_manager.cc:167
void lock_queue(StageID stage)
Wrapper on Mutex_queue lock(), acquires lock on stage queue.
Definition: rpl_commit_stage_manager.h:401
THD * leader_thd
Save pointer to leader thread which is used later to awake leader.
Definition: rpl_commit_stage_manager.h:470
void process_final_stage_for_ordered_commit_group(THD *first)
This function gets called after transactions are flushed to the engine i.e.
Definition: rpl_commit_stage_manager.cc:476
THD * fetch_queue_skip_acquire_lock(StageID stage)
Fetch the entire queue and empty it.
Definition: rpl_commit_stage_manager.cc:471
mysql_cond_t m_cond_preempt
Condition variable to indicate a follower started waiting for commit.
Definition: rpl_commit_stage_manager.h:476
void signal_done(THD *queue, StageID stage=BINLOG_FLUSH_STAGE)
The function is called after follower thread are processed by leader, to unblock follower threads.
Definition: rpl_commit_stage_manager.cc:484
void unlock_queue(StageID stage)
Wrapper on Mutex_queue unlock(), releases lock on stage queue.
Definition: rpl_commit_stage_manager.h:408
bool enroll_for(StageID stage, THD *first, mysql_mutex_t *stage_mutex, mysql_mutex_t *enter_mutex)
Enroll a set of sessions for a stage.
Definition: rpl_commit_stage_manager.cc:238
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Represents the Binlog Group Commit Ticket - BGC Ticket.
Definition: bgc_ticket.h:54
#define mysql_mutex_lock(M)
Definition: mysql_mutex.h:50
#define mysql_mutex_unlock(M)
Definition: mysql_mutex.h:57
unsigned int PSI_cond_key
Instrumented cond key.
Definition: psi_cond_bits.h:44
unsigned int PSI_mutex_key
Instrumented mutex key.
Definition: psi_mutex_bits.h:52
#define mysql_mutex_assert_not_owner(M)
Wrapper, to use safe_mutex_assert_not_owner with instrumented mutexes.
Definition: mysql_mutex.h:126
#define mysql_mutex_assert_owner(M)
Wrapper, to use safe_mutex_assert_owner with instrumented mutexes.
Definition: mysql_mutex.h:112
int32_t int32
Definition: my_inttypes.h:66
static int count
Definition: myisam_ftdump.cc:45
static QUEUE queue
Definition: myisampack.cc:210
Provides atomic access in shared-exclusive modes.
Definition: shared_spin_lock.h:79
Instrumentation helpers for conditions.
Instrumentation helpers for mutexes.
An instrumented cond structure.
Definition: mysql_cond_bits.h:50
An instrumented mutex structure.
Definition: mysql_mutex_bits.h:50
MySQL mutex implementation.