MySQL 8.3.0
Source Code Documentation
ut0rnd.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2023, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25*****************************************************************************/
26
27/** @file include/ut0rnd.h
28 Random numbers and hashing
29
30 Created 1/20/1994 Heikki Tuuri
31 ***********************************************************************/
32
33#ifndef ut0rnd_h
34#define ut0rnd_h
35
36#include <atomic>
37#include <cstdint>
38
39#include "univ.i"
40#include "ut0byte.h"
41#include "ut0math.h"
42
43#include "ut0class_life_cycle.h"
44#include "ut0crc32.h"
45#include "ut0seq_lock.h"
46
47namespace ut {
48/** The following function generates pseudo-random 64bit integers which
49enumerate the value space generated by a linear congruence. This is very similar
50to what can be achieved with std::linear_congruential_engine<uint64_t, ...>. If
51you need reliably good pseudo-random numbers for some reason, please consider
52using std::mersenne_twister_engine<>.
53@return a pseudo-random number */
54[[nodiscard]] static inline uint64_t random_64();
55
56/** The following function returns fine clock count as random value.
57This is used for cases which need performance more than the true randomness.
58It is not causing any loads or stores, so it is very CPU-cache-friendly.
59Even for very frequent use, it doesn't cause any CPU cache pollution.
60@return a pseudo-random number */
61[[nodiscard]] static inline uint64_t random_64_fast();
62
63/** Generates a pseudo-random integer from a given interval.
64@param[in] low low limit; can generate also this value
65@param[in] high high limit; can generate also this value
66@return the pseudo-random number within the [low, high] two-side inclusive range
67*/
68[[nodiscard]] static inline uint64_t random_from_interval(uint64_t low,
69 uint64_t high);
70
71/** Generates a light-weight pseudo-random integer from a given interval.
72This is used for the cases which need performance more than the true randomness.
73@param[in] low low limit; can generate also this value
74@param[in] high high limit; can generate also this value
75@return the pseudo-random number within the [low, high] two-side inclusive range
76*/
77[[nodiscard]] static inline uint64_t random_from_interval_fast(uint64_t low,
78 uint64_t high);
79
80/** Hashes a 64-bit integer.
81@param[in] value 64-bit integer
82@return hashed value */
83[[nodiscard]] static inline uint64_t hash_uint64(uint64_t value);
84
85/** Hashes a character string ending in the null character.
86 @return hashed value */
87[[nodiscard]] static inline uint64_t hash_string(const char *str);
88
89/** Hashes a pair of 64bit integers.
90@param[in] n1 first 64bit integer
91@param[in] n2 second 64bit integer
92@return hashed value */
93[[nodiscard]] static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2);
94/** Hashes a binary buffer of given length.
95@param[in] buf buffer of bytes
96@param[in] len length
97@param[in] seed seed to be used in calculation. Can be previous value. A
98 default value is just a randomly chosen number.
99@return hashed value */
100[[nodiscard]] static inline uint64_t hash_binary(
101 const byte *buf, size_t len, uint64_t seed = 0xacb1f3526e25dd39);
102
103/** Hashes a binary buffer of given length in the old innobase way. This is
104highly inefficient, don't use outside areas that require backward compatibility
105on data written to disk.
106@param[in] str buffer of bytes
107@param[in] len length
108@return hashed value */
109[[nodiscard]] static inline uint32_t hash_binary_ib(const byte *str,
110 size_t len);
111
112namespace detail {
113/** Seed value of ut::random_64() */
114extern thread_local uint64_t random_seed;
115
116/** A helper method, it is used by hash_binary_ib for backward compatibility.
117NOTE: Do not use this method, it produces results that are not hashed well.
118Especially for sequences of pairs of <i+n, j+n> over n. */
119constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2) {
120 constexpr uint32_t HASH_RANDOM_MASK = 1463735687;
121 constexpr uint32_t HASH_RANDOM_MASK2 = 1653893711;
122
123 return ((((n1 ^ n2 ^ HASH_RANDOM_MASK2) << 8) + n1) ^ HASH_RANDOM_MASK) + n2;
124}
125
126/* Helper methods to read unaligned little-endian integers. These should be
127optimized to just simple, single reads on little-endian architecture like x86
128and x64. It may be different for SPARC (and in particular read_from_8(&some_u64)
129is not equal to some_u64's value. On SPARC the unaligned reads are prohibited,
130so for buffer that is not known to be aligned we will read data byte by byte.
131Note that the following methods and usages similar to what we do in
132hash_binary() could be applied to crc32 algorithm calculations and an early
133investigation showed 10-50% improvement in CRC32 speed for buffer lengths <=
134500. However, there was a lot of effort put into crc32 implementation to make
135sure it is efficient and generates correct, simple assembly for x64, ARM (and M1
136CPU) and other architectures. So in order to incorporate these ideas into CRC32
137a lot of testing is required to be performed again, with care. */
138
139static inline int64_t read_from_1(const byte *addr) { return *addr; }
140static inline int64_t read_from_2(const byte *addr) {
141 return read_from_1(addr) | (read_from_1(addr + 1) << 8);
142}
143static inline int64_t read_from_4(const byte *addr) {
144 return read_from_2(addr) | (read_from_2(addr + 2) << 16);
145}
146static inline int64_t read_from_8(const byte *addr) {
147 return read_from_4(addr) | (read_from_4(addr + 4) << 32);
148}
149
150/** Table for Tabulation Hashing, precomputed hash values for all byte values.
151The table could be for different amount of bits per chunk than 8, but 8 seems to
152yield faster hash calculation than 4 or 16bits - 4bits require 16 XOR operations
153and sets of bit manipulations, while 16bit require bigger tables that slow down
154caches. The indexes are set such that hash values for a specific byte value are
155stored in the same cache line (8 values * 8 bytes = 64B). This way it should be
156much faster and less demanding on CPU caches to calculate results for small
157integers (where most bytes are 0). */
158extern std::array<std::array<uint64_t, 8>, 256> tab_hash_lookup_table;
159
160} // namespace detail
161
162static inline uint64_t random_64() {
164 return detail::random_seed;
165}
166
167static inline uint64_t random_64_fast() {
168 /* Granularity of my_timer_cycles() might be over 1, to keep constant rate for
169 frequency changes of CPU core clocks. Drops lower 5 bits. */
170 const uint64_t res = my_timer_cycles();
171 return res != 0 ? res >> 5 : random_64();
172}
173
174template <uint64_t random_64_func()>
175static inline uint64_t random_from_interval_gen(uint64_t low, uint64_t high) {
176 ut_ad(high >= low);
177 return low + (random_64_func() % (high - low + 1));
178}
179
180static inline uint64_t random_from_interval(uint64_t low, uint64_t high) {
181 return random_from_interval_gen<random_64>(low, high);
182}
183
184static inline uint64_t random_from_interval_fast(uint64_t low, uint64_t high) {
185 return random_from_interval_gen<random_64_fast>(low, high);
186}
187
188static inline uint64_t hash_uint64(uint64_t value) {
189#ifndef CRC32_DEFAULT
191 return crc32_hash_uint64(value);
192 }
193#endif /* !CRC32_DEFAULT */
194
195 /* This implements Tabulation Hashing. It is a simple yet effective algorithm
196 that easily passes the unit tests that check distributions of hash values
197 modulo different numbers. Other techniques like one from hash_uint64_pair_fast
198 or extended 4-universal version did have some problems for specific modulo
199 numbers or have been providing 61bit hashes. */
200 uint64_t res = 0;
201 for (size_t i = 0; i < sizeof(uint64_t); ++i) {
202 res ^= detail::tab_hash_lookup_table[(value >> (i * 8)) & 0xFF][i];
203 }
204 return res;
205}
206
207static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2) {
208 return hash_uint64(hash_uint64(n1) ^ n2);
209}
210
211static inline uint64_t hash_string(const char *str) {
212 return hash_binary(reinterpret_cast<const byte *>(str), strlen(str));
213}
214
215static inline uint64_t hash_binary(const byte *str, size_t len, uint64_t seed) {
216 /* Our implementation is slower than CRC32 while the CRC32 implementation is
217 super fast, especially for higher lengths as it leverages 3-fold parallelism
218 on CPU core pipeline level. */
219 if (len >= 15) {
220 /* The CRC32 is returning only 32bits of hash. We take out the first 8 bytes
221 to seed our 64bit hash just like we do when CRC32 is not used. */
222 const auto h = hash_uint64_pair(seed, detail::read_from_8(str));
223 return hash_uint64_pair(h, ut_crc32(str + 8, len - 8));
224 }
225
226 /* The following algorithm is based on idea of applying hash_uint64_pair for
227 64bit blocks of data. If there are less than 8 bytes remaining, we hash
228 remaining 4, 2 and 1 bytes for all input bytes have influence on result hash.
229 */
230
231 uint64_t h = seed;
232 size_t i = 0;
233 if (len & 8) {
235 i += 8;
236 }
237 uint64_t last_part = 0;
238 if ((len & 7) == 0) {
239 return h;
240 }
241 if (len & 4) {
242 last_part |= detail::read_from_4(str + i) << (16 + 8);
243 i += 4;
244 }
245 if (len & 2) {
246 last_part |= detail::read_from_2(str + i) << 8;
247 i += 2;
248 }
249 if (len & 1) {
250 last_part |= detail::read_from_1(str + i);
251 i += 1;
252 }
253 ut_ad(i == len);
254 return hash_uint64_pair(h, last_part);
255}
256
257static inline uint32_t hash_binary_ib(const byte *str, size_t len) {
258 uint32_t hash_value = 0;
259 const byte *str_end = str + (len & 0xFFFFFFF8);
260
261 ut_ad(str || !len);
262
263 while (str < str_end) {
264 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
265 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
266 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
267 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
268 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
269 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
270 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
271 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
272 }
273
274 switch (len & 0x7) {
275 case 7:
276 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
277 [[fallthrough]];
278 case 6:
279 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
280 [[fallthrough]];
281 case 5:
282 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
283 [[fallthrough]];
284 case 4:
285 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
286 [[fallthrough]];
287 case 3:
288 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
289 [[fallthrough]];
290 case 2:
291 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
292 [[fallthrough]];
293 case 1:
294 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
295 }
296
297 return (hash_value);
298}
299} // namespace ut
300
301#endif
ulonglong my_timer_cycles(void)
A cycle timer.
Definition: my_rdtsc.cc:97
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1065
Definition: buf0block_hint.cc:29
Definition: ut0tuple.h:56
constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2)
A helper method, it is used by hash_binary_ib for backward compatibility.
Definition: ut0rnd.h:119
static int64_t read_from_4(const byte *addr)
Definition: ut0rnd.h:143
static int64_t read_from_8(const byte *addr)
Definition: ut0rnd.h:146
thread_local uint64_t random_seed
Seed value of ut::random_64()
Definition: ut0rnd.cc:39
static int64_t read_from_1(const byte *addr)
Definition: ut0rnd.h:139
std::array< std::array< uint64_t, 8 >, 256 > tab_hash_lookup_table
Table for Tabulation Hashing, precomputed hash values for all byte values.
Definition: ut0rnd.cc:43
static int64_t read_from_2(const byte *addr)
Definition: ut0rnd.h:140
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:47
static uint64_t random_from_interval(uint64_t low, uint64_t high)
Generates a pseudo-random integer from a given interval.
Definition: ut0rnd.h:180
static uint64_t hash_string(const char *str)
Hashes a character string ending in the null character.
Definition: ut0rnd.h:211
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:188
static uint32_t hash_binary_ib(const byte *str, size_t len)
Hashes a binary buffer of given length in the old innobase way.
Definition: ut0rnd.h:257
static uint64_t hash_binary(const byte *buf, size_t len, uint64_t seed=0xacb1f3526e25dd39)
Hashes a binary buffer of given length.
Definition: ut0rnd.h:215
static uint64_t random_64_fast()
The following function returns fine clock count as random value.
Definition: ut0rnd.h:167
static uint64_t random_64()
The following function generates pseudo-random 64bit integers which enumerate the value space generat...
Definition: ut0rnd.h:162
static uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2)
Hashes a pair of 64bit integers.
Definition: ut0rnd.h:207
static uint64_t random_from_interval_fast(uint64_t low, uint64_t high)
Generates a light-weight pseudo-random integer from a given interval.
Definition: ut0rnd.h:184
static uint64_t random_from_interval_gen(uint64_t low, uint64_t high)
Definition: ut0rnd.h:175
Version control for database, common definitions, and include files.
Utilities for byte operations.
Utilities related to class lifecycle.
CRC32 implementation.
bool ut_crc32_cpu_enabled
Flag that tells whether the CPU supports CRC32 or not.
Definition: crc32.cc:102
ut_crc32_func_t ut_crc32
Pointer to standard-compliant CRC32-C (using the GF(2) primitive polynomial 0x11EDC6F41) calculation ...
Definition: crc32.cc:100
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:104
Math functions.
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...