MySQL 9.7.0
Source Code Documentation
resource_manager_stats_collector.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2024, 2026, Oracle and/or its affiliates.
3*/
4
5#ifndef RESOURCE_MANAGER_STATS_COLLECTOR_H
6#define RESOURCE_MANAGER_STATS_COLLECTOR_H
7
8#include <atomic>
9#include <memory>
10#include <thread>
11
12#include <my_inttypes.h>
19
21
22namespace gr_resource_manager {
23/**
24 @class Lag_metadata_thresholds
25
26 This class stores the lag thesholds controlled by system variables.
27*/
29 public:
30 /// Applier channel threshold.
31 std::atomic<uint> applier_lag_limit_in_seconds{3600};
32 /// Recover channel threshold.
33 std::atomic<uint> recovery_lag_limit_in_seconds{3600};
34 /// Used memory threshold.
35 std::atomic<uint> used_memory_limit{100};
36 /// Quarantine time.
37 std::atomic<uint> quarantine_time{3600};
38};
39
41 private:
43
44 public:
48 delete;
50 const Resource_manager_stats_collector &) = delete;
52 delete;
55
56 public:
57 // Functions for channel and memory
58 /**
59 Return applier lag.
60
61 @retval applier lag in seconds
62 */
63 uint get_applier_lag();
64 /**
65 Return recovery lag.
66
67 @retval recovery lag in seconds
68 */
69 uint get_recovery_lag();
70 /**
71 Return used memory.
72
73 @retval percentage of used memory
74 */
76
77 /**
78 Returns number of times applier lag was seen.
79
80 @retval counter of applier lag seen
81 */
84 }
85 /**
86 Returns number of times recovery lag was seen.
87
88 @retval counter of recovery lag seen
89 */
92 }
93 /**
94 Returns number of times low memory was observed.
95
96 @retval counter of low memory seen
97 */
99
100 /**
101 Check if the time difference between `timepoint` and `now` is
102 greater than `diff`.
103 Parameters must be of the same unit: seconds, microseconds...
104
105 @param[in] timepoint timepoint in time
106 @param[in] now a later timepoint in time
107 @param[in] diff the difference to compare
108
109 @returns true time difference between `timepoint` and `now` is
110 greater than `diff`
111 false otherwise
112 */
113 static bool is_time_diff_greater_than(uint64_t timepoint, uint64_t now,
114 uint64_t diff = 300);
115
116 /**
117 Returns the current time represented by seconds
118 elapsed since the Epoch.
119
120 @returns the current time represented by seconds
121 elapsed since the Epoch.
122 */
123 static uint64_t get_time_now_seconds();
124
125 /**
126 Returns the current time represented by micro-seconds
127 elapsed since the Epoch.
128
129 @returns the current time represented by micro-seconds
130 elapsed since the Epoch.
131 */
132 static uint64_t get_time_now_microseconds();
133
134 /**
135 Converts a time point represented by micro-seconds
136 elapsed since the Epoch to the string format
137 'YYYY-MM-DD hh:mm:ss.ffffff'.
138
139 @param[in] microseconds_since_epoch time point represented
140 by micro-seconds elapsed since the Epoch
141
142 @returns a string with the format 'YYYY-MM-DD hh:mm:ss.ffffff'
143 */
145 uint64_t microseconds_since_epoch);
146
148
149 /// Checks if group_replication is running.
150 bool is_group_replication_running() const;
151
152 /// Checks if group is in single primary mode and secondary
154
155 /// Timestamp of last eviction caused by applier lag
156 std::string get_applier_eviction_timestamp();
157
158 /// Timestamp of last eviction caused by recovery lag
160
161 /// Timestamp of last eviction caused by low memory
162 std::string get_memory_eviction_timestamp();
163
164 /// Timestamp of last channel lag query failure
166
167 /// Timestamp of last memory status failure
169
170 private:
171 /// Wrappers to call API and do error handling.
172 int fetch_memory_used();
173 int fetch_channel_lag();
174
175 private:
176 /// frequency of sample collection, in seconds
177 static constexpr uint s_sample_rate{5};
178 /// Continuous lag tolerance limit.
179 static constexpr uint s_max_continuous_lag_counter{10};
180
181 /// Timestamp of last channel lag query failure.
183 /// Timestamp of last memory information failure.
184 std::atomic<uint64_t> m_memory_monitoring_error_timestamp{0};
185
186 /// Number of times applier channel lag was hit continuously.
188 /// Number of times recovery channel lag was hit continuously.
190 /// Number of times memory consumption cross the limit continuously.
192
193 private:
194 /// Below information is needed by status variables.
195
196 /// Applier channel lag fetched from SQL Query.
197 std::atomic<uint> m_applier_lag_last_fetched{0};
198 /// Recovery channel lag fetched from SQL Query.
199 std::atomic<uint> m_recovery_lag_last_fetched{0};
200 /// Used memory in percentage fetched from system.
202
203 /// Number of times applier channel lag exceeded the limit.
204 std::atomic<uint> m_applier_hit_number_of_times{0};
205 /// Number of times recovery channel lag exceeded the limit.
206 std::atomic<uint> m_recovery_hit_number_of_times{0};
207 /// Number of times memory comsuption exceeded the limit.
208 std::atomic<uint> m_memory_hit_number_of_times{0};
209
210 /// Last timestamp member left the group due to applier lag.
211 std::atomic<uint64_t> m_applier_eviction_timestamp{0};
212 /// Last timestamp member left the group due to recovery lag.
213 std::atomic<uint64_t> m_recovery_eviction_timestamp{0};
214 /// Last timestamp member left the group due to memory excessive consumption.
215 std::atomic<uint64_t> m_memory_eviction_timestamp{0};
216
217 private:
218 /// Time difference in seconds between last time this member joined and now.
220
221 /// Below variables are needed to reduce the frequency of logging.
228
229 public:
230 /// Thread functions
231 int start_thread();
232 void stop_thread();
233 void run_process();
234
235 private:
236 static int join(my_thread_handle *thread, void **value_ptr);
237 static bool joinable(const my_thread_handle &thread);
238 void process();
239
240 private:
241 /// Needed by THD
242 const std::string m_name;
243 const std::string m_instr_name;
244 const std::string m_os_name;
245 const std::string m_description;
246
249
250 /// Thread structure.
255 m_instr_name.c_str(),
256 m_os_name.c_str(),
258 PSI_VOLATILITY_PROVISIONING, // instruments are created/destroyed
259 m_description.c_str()};
260
261 /// Synchronize start, stop and update
267
273};
274
275} // namespace gr_resource_manager
276
277#endif /* RESOURCE_MANAGER_STATS_COLLECTOR_H */
This class stores the lag thesholds controlled by system variables.
Definition: resource_manager_stats_collector.h:28
std::atomic< uint > applier_lag_limit_in_seconds
Applier channel threshold.
Definition: resource_manager_stats_collector.h:31
std::atomic< uint > used_memory_limit
Used memory threshold.
Definition: resource_manager_stats_collector.h:35
std::atomic< uint > recovery_lag_limit_in_seconds
Recover channel threshold.
Definition: resource_manager_stats_collector.h:33
std::atomic< uint > quarantine_time
Quarantine time.
Definition: resource_manager_stats_collector.h:37
Definition: resource_manager_stats_collector.h:40
unsigned int m_seconds_since_member_join
Time difference in seconds between last time this member joined and now.
Definition: resource_manager_stats_collector.h:219
PSI_thread_info m_thread_info
Definition: resource_manager_stats_collector.h:253
uint m_applier_continuous_lag
Number of times applier channel lag was hit continuously.
Definition: resource_manager_stats_collector.h:187
void process()
Definition: resource_manager_stats_collector.cc:229
mysql_mutex_t m_command_lock
Synchronize start, stop and update.
Definition: resource_manager_stats_collector.h:262
uint get_recovery_lag()
Return recovery lag.
Definition: resource_manager_stats_collector.cc:166
PSI_thread_key m_thread_key
Definition: resource_manager_stats_collector.h:252
const std::string m_description
Definition: resource_manager_stats_collector.h:245
~Resource_manager_stats_collector()
Definition: resource_manager_stats_collector.cc:51
std::atomic< uint > m_memory_hit_number_of_times
Number of times memory comsuption exceeded the limit.
Definition: resource_manager_stats_collector.h:208
uint m_recovery_continuous_lag
Number of times recovery channel lag was hit continuously.
Definition: resource_manager_stats_collector.h:189
static constexpr uint s_sample_rate
frequency of sample collection, in seconds
Definition: resource_manager_stats_collector.h:177
uint get_recovery_hit_number_of_times()
Returns number of times recovery lag was seen.
Definition: resource_manager_stats_collector.h:90
std::atomic< uint > m_recovery_lag_last_fetched
Recovery channel lag fetched from SQL Query.
Definition: resource_manager_stats_collector.h:199
uint64_t m_not_removed_memory_threshold_hit_quarantime_ts_s
Definition: resource_manager_stats_collector.h:224
eject_status leave_group()
Definition: resource_manager_stats_collector.cc:148
void stop_thread()
Definition: resource_manager_stats_collector.cc:481
void run_process()
Definition: resource_manager_stats_collector.cc:429
gr_resource_manager::Memory_Info m_memory_info
Definition: resource_manager_stats_collector.h:42
std::string get_lag_query_last_error_timestamp()
Timestamp of last channel lag query failure.
Definition: resource_manager_stats_collector.cc:116
uint64_t m_not_removed_applier_threshold_hit_n_members_ts_s
Definition: resource_manager_stats_collector.h:225
const std::string m_instr_name
Definition: resource_manager_stats_collector.h:243
PSI_cond_key m_cond_key
Definition: resource_manager_stats_collector.h:269
std::atomic< uint64_t > m_recovery_eviction_timestamp
Last timestamp member left the group due to recovery lag.
Definition: resource_manager_stats_collector.h:213
static std::string convert_microseconds_to_timestamp_string(uint64_t microseconds_since_epoch)
Converts a time point represented by micro-seconds elapsed since the Epoch to the string format 'YYYY...
Definition: resource_manager_stats_collector.cc:78
uint get_applier_lag()
Return applier lag.
Definition: resource_manager_stats_collector.cc:163
mysql_cond_t m_wait
Definition: resource_manager_stats_collector.h:268
static bool joinable(const my_thread_handle &thread)
Definition: resource_manager_stats_collector.cc:420
uint get_memory_hit_number_of_times()
Returns number of times low memory was observed.
Definition: resource_manager_stats_collector.h:98
gr_rm_thread_state
Definition: resource_manager_stats_collector.h:247
@ IDLE
Definition: resource_manager_stats_collector.h:247
@ RUNNING
Definition: resource_manager_stats_collector.h:247
@ STOPPING
Definition: resource_manager_stats_collector.h:247
uint64_t m_not_removed_recovery_threshold_hit_n_members_ts_s
Definition: resource_manager_stats_collector.h:226
uint get_percentage_used_memory()
Return used memory.
Definition: resource_manager_stats_collector.cc:169
std::string get_applier_eviction_timestamp()
Timestamp of last eviction caused by applier lag.
Definition: resource_manager_stats_collector.cc:99
const std::string m_os_name
Definition: resource_manager_stats_collector.h:244
std::atomic< uint > m_recovery_hit_number_of_times
Number of times recovery channel lag exceeded the limit.
Definition: resource_manager_stats_collector.h:206
std::atomic< uint > m_applier_hit_number_of_times
Number of times applier channel lag exceeded the limit.
Definition: resource_manager_stats_collector.h:204
std::atomic< uint64_t > m_channel_lag_monitoring_error_timestamp
Timestamp of last channel lag query failure.
Definition: resource_manager_stats_collector.h:182
uint64_t m_not_removed_applier_threshold_hit_quarantime_ts_s
Below variables are needed to reduce the frequency of logging.
Definition: resource_manager_stats_collector.h:222
static constexpr uint s_max_continuous_lag_counter
Continuous lag tolerance limit.
Definition: resource_manager_stats_collector.h:179
std::atomic< uint64_t > m_memory_monitoring_error_timestamp
Timestamp of last memory information failure.
Definition: resource_manager_stats_collector.h:184
PSI_mutex_key m_command_key
Definition: resource_manager_stats_collector.h:263
int fetch_channel_lag()
Definition: resource_manager_stats_collector.cc:194
uint64_t m_not_removed_recovery_threshold_hit_quarantime_ts_s
Definition: resource_manager_stats_collector.h:223
uint m_memory_continuous_excess_usage
Number of times memory consumption cross the limit continuously.
Definition: resource_manager_stats_collector.h:191
static int join(my_thread_handle *thread, void **value_ptr)
Definition: resource_manager_stats_collector.cc:403
static uint64_t get_time_now_microseconds()
Returns the current time represented by micro-seconds elapsed since the Epoch.
Definition: resource_manager_stats_collector.cc:70
my_thread_handle m_thread
Thread structure.
Definition: resource_manager_stats_collector.h:251
Resource_manager_stats_collector(const Resource_manager_stats_collector &)=delete
std::atomic< uint64_t > m_memory_eviction_timestamp
Last timestamp member left the group due to memory excessive consumption.
Definition: resource_manager_stats_collector.h:215
std::atomic< uint > m_percentage_used_memory_last_fetched
Used memory in percentage fetched from system.
Definition: resource_manager_stats_collector.h:201
std::atomic< uint > m_applier_lag_last_fetched
Below information is needed by status variables.
Definition: resource_manager_stats_collector.h:197
static bool is_time_diff_greater_than(uint64_t timepoint, uint64_t now, uint64_t diff=300)
Check if the time difference between timepoint and now is greater than diff.
Definition: resource_manager_stats_collector.cc:58
bool is_group_replication_running() const
Checks if group_replication is running.
Definition: resource_manager_stats_collector.cc:127
int fetch_memory_used()
Wrappers to call API and do error handling.
Definition: resource_manager_stats_collector.cc:173
PSI_mutex_info m_command_info
Definition: resource_manager_stats_collector.h:264
Resource_manager_stats_collector(Resource_manager_stats_collector &&)=delete
Resource_manager_stats_collector()
Definition: resource_manager_stats_collector.cc:37
std::string get_memory_last_error_timestamp()
Timestamp of last memory status failure.
Definition: resource_manager_stats_collector.cc:122
uint64_t m_not_removed_memory_threshold_hit_n_members_ts_s
Definition: resource_manager_stats_collector.h:227
const std::string m_name
Needed by THD.
Definition: resource_manager_stats_collector.h:242
int start_thread()
Thread functions.
Definition: resource_manager_stats_collector.cc:469
gr_rm_thread_state m_thread_state
Definition: resource_manager_stats_collector.h:248
uint get_applier_hit_number_of_times()
Returns number of times applier lag was seen.
Definition: resource_manager_stats_collector.h:82
bool is_group_in_single_primary_mode_and_im_a_secondary() const
Checks if group is in single primary mode and secondary.
Definition: resource_manager_stats_collector.cc:138
PSI_cond_info m_cond_info
Definition: resource_manager_stats_collector.h:270
Resource_manager_stats_collector & operator=(Resource_manager_stats_collector &&)=delete
static uint64_t get_time_now_seconds()
Returns the current time represented by seconds elapsed since the Epoch.
Definition: resource_manager_stats_collector.cc:63
std::string get_recovery_eviction_timestamp()
Timestamp of last eviction caused by recovery lag.
Definition: resource_manager_stats_collector.cc:105
std::atomic< uint64_t > m_applier_eviction_timestamp
Last timestamp member left the group due to applier lag.
Definition: resource_manager_stats_collector.h:211
Resource_manager_stats_collector & operator=(const Resource_manager_stats_collector &)=delete
std::string get_memory_eviction_timestamp()
Timestamp of last eviction caused by low memory.
Definition: resource_manager_stats_collector.cc:110
#define PSI_DOCUMENT_ME
Definition: component_common.h:29
#define PSI_FLAG_SINGLETON
Singleton flag.
Definition: component_common.h:35
static Bigint * diff(Bigint *a, Bigint *b, Stack_alloc *alloc)
Definition: dtoa.cc:1081
unsigned int PSI_cond_key
Instrumented cond key.
Definition: psi_cond_bits.h:44
unsigned int PSI_mutex_key
Instrumented mutex key.
Definition: psi_mutex_bits.h:52
unsigned int PSI_thread_key
Instrumented thread key.
Definition: psi_thread_bits.h:50
#define PSI_VOLATILITY_PROVISIONING
Definition: psi_bits.h:155
#define PSI_FLAG_THREAD_SYSTEM
System thread flag.
Definition: psi_bits.h:124
eject_status
Definition: group_replication_management_service.h:30
Some integer typedefs for easier portability.
Definition: resource_manager.cc:53
Condition information.
Definition: psi_cond_bits.h:88
Mutex information.
Definition: psi_mutex_bits.h:73
Thread instrument information.
Definition: psi_thread_bits.h:117
Definition: resource_manager_memory_stats.h:29
Definition: my_thread_bits.h:58
An instrumented cond structure.
Definition: mysql_cond_bits.h:50
An instrumented mutex structure.
Definition: mysql_mutex_bits.h:50