MySQL 8.1.0
Source Code Documentation
ut0rnd.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2023, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25*****************************************************************************/
26
27/** @file include/ut0rnd.h
28 Random numbers and hashing
29
30 Created 1/20/1994 Heikki Tuuri
31 ***********************************************************************/
32
33#ifndef ut0rnd_h
34#define ut0rnd_h
35
36#include <atomic>
37#include <cstdint>
38
39#include "univ.i"
40#include "ut0byte.h"
41#include "ut0math.h"
42
43#include "ut0class_life_cycle.h"
44#include "ut0crc32.h"
45#include "ut0seq_lock.h"
46
47namespace ut {
48namespace detail {
49
50/* These are three randomly generated 64bit values, with additional constraint
51that a1 and a2 must be odd. These are used to perform hashing using algorithm
52described on https://sortingsearching.com/2020/05/21/hashing.html */
53
54constexpr uint64_t fast_hash_coeff_a1_64bit = 0xacb1f3526e25dd39;
55constexpr uint64_t fast_hash_coeff_a2_64bit = 0xd65b9175b5247ba3;
56constexpr uint64_t fast_hash_coeff_b_64bit = 0xf72a876a516b4b56;
57
58} // namespace detail
59
60/** The following function generates pseudo-random 64bit integers which
61enumerate the value space generated by a linear congruence. This is very similar
62to what can be achieved with std::linear_congruential_engine<uint64_t, ...>. If
63you need reliably good pseudo-random numbers for some reason, please consider
64using std::mersenne_twister_engine<>.
65@return a pseudo-random number */
66[[nodiscard]] static inline uint64_t random_64();
67
68/** The following function returns fine clock count as random value.
69This is used for cases which need performance more than the true randomness.
70It is not causing any loads or stores, so it is very CPU-cache-friendly.
71Even for very frequent use, it doesn't cause any CPU cache pollution.
72@return a pseudo-random number */
73[[nodiscard]] static inline uint64_t random_64_fast();
74
75/** Generates a pseudo-random integer from a given interval.
76@param[in] low low limit; can generate also this value
77@param[in] high high limit; can generate also this value
78@return the pseudo-random number within the [low, high] two-side inclusive range
79*/
80[[nodiscard]] static inline uint64_t random_from_interval(uint64_t low,
81 uint64_t high);
82
83/** Generates a light-weight pseudo-random integer from a given interval.
84This is used for the cases which need performance more than the true randomness.
85@param[in] low low limit; can generate also this value
86@param[in] high high limit; can generate also this value
87@return the pseudo-random number within the [low, high] two-side inclusive range
88*/
89[[nodiscard]] static inline uint64_t random_from_interval_fast(uint64_t low,
90 uint64_t high);
91
92/** Hashes a 64-bit integer.
93@param[in] value 64-bit integer
94@return hashed value */
95[[nodiscard]] static inline uint64_t hash_uint64(uint64_t value);
96
97/** Hashes a character string ending in the null character.
98 @return hashed value */
99[[nodiscard]] static inline uint64_t hash_string(const char *str);
100
101/** Hashes a pair of 64bit integers.
102@param[in] n1 first 64bit integer
103@param[in] n2 second 64bit integer
104@return hashed value */
105[[nodiscard]] static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2);
106/** Hashes a binary buffer of given length.
107@param[in] buf buffer of bytes
108@param[in] len length
109@param[in] seed seed to be used in calculation. Can be previous value.
110@return hashed value */
111[[nodiscard]] static inline uint64_t hash_binary(
112 const byte *buf, size_t len,
113 uint64_t seed = detail::fast_hash_coeff_a1_64bit);
114
115/** Hashes a binary buffer of given length in the old innobase way. This is
116highly inefficient, don't use outside areas that require backward compatibility
117on data written to disk.
118@param[in] str buffer of bytes
119@param[in] len length
120@return hashed value */
121[[nodiscard]] static inline uint32_t hash_binary_ib(const byte *str,
122 size_t len);
123
124namespace detail {
125/** Seed value of ut::random_64() */
126extern thread_local uint64_t random_seed;
127
128/** A helper method, it is used by hash_binary_ib for backward compatibility.
129NOTE: Do not use this method, it produces results that are not hashed well.
130Especially for sequences of pairs of <i+n, j+n> over n. */
131constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2) {
132 constexpr uint32_t HASH_RANDOM_MASK = 1463735687;
133 constexpr uint32_t HASH_RANDOM_MASK2 = 1653893711;
134
135 return ((((n1 ^ n2 ^ HASH_RANDOM_MASK2) << 8) + n1) ^ HASH_RANDOM_MASK) + n2;
136}
137
138/* Helper methods to read unaligned little-endian integers. These should be
139optimized to just simple, single reads on little-endian architecture like x86
140and x64. It may be different for SPARC (and in particular read_from_8(&some_u64)
141is not equal to some_u64's value. On SPARC the unaligned reads are prohibited,
142so for buffer that is not known to be aligned we will read data byte by byte.
143Note that the following methods and usages similar to what we do in
144hash_binary() could be applied to crc32 algorithm calculations and an early
145investigation showed 10-50% improvement in CRC32 speed for buffer lengths <=
146500. However, there was a lot of effort put into crc32 implementation to make
147sure it is efficient and generates correct, simple assembly for x64, ARM (and M1
148CPU) and other architectures. So in order to incorporate these ideas into CRC32
149a lot of testing is required to be performed again, with care. */
150
151static inline int64_t read_from_1(const byte *addr) { return *addr; }
152static inline int64_t read_from_2(const byte *addr) {
153 return read_from_1(addr) | (read_from_1(addr + 1) << 8);
154}
155static inline int64_t read_from_4(const byte *addr) {
156 return read_from_2(addr) | (read_from_2(addr + 2) << 16);
157}
158static inline int64_t read_from_8(const byte *addr) {
159 return read_from_4(addr) | (read_from_4(addr + 4) << 32);
160}
161
162/** Calculates 64bit hash from two 64bit inputs. It is quite fast, but the
163quality of outputs is not best. The lower bits, the worse is their distribution.
164*/
165constexpr uint64_t hash_uint64_pair_fast(uint64_t n1, uint64_t n2) {
166 /* The following algorithm is very simple, yet proven to be quite good, that
167 is it is proven that for two different arguments it will yield different
168 results with 2^(-64) probability. This is based on algorithm for h3 function
169 for n=2, described on https://sortingsearching.com/2020/05/21/hashing.html
170 (which in turn references Ph.D. thesis of Philipp Wolfel. It does not
171 necessarily yield good distribution when consecutive integers are supplied as
172 inputs, especially if the results are taken modulo small numbers like 8 or 11.
173 */
177}
178
179/** Table for Tabulation Hashing, precomputed hash values for all byte values.
180The table could be for different amount of bits per chunk than 8, but 8 seems to
181yield faster hash calculation than 4 or 16bits - 4bits require 16 XOR operations
182and sets of bit manipulations, while 16bit require bigger tables that slow down
183caches. The indexes are set such that hash values for a specific byte value are
184stored in the same cache line (8 values * 8 bytes = 64B). This way it should be
185much faster and less demanding on CPU caches to calculate results for small
186integers (where most bytes are 0). */
187extern std::array<std::array<uint64_t, 8>, 256> tab_hash_lookup_table;
188
189} // namespace detail
190
191static inline uint64_t random_64() {
193 return detail::random_seed;
194}
195
196static inline uint64_t random_64_fast() {
197 /* Granularity of my_timer_cycles() might be over 1, to keep constant rate for
198 frequency changes of CPU core clocks. Drops lower 5 bits. */
199 const uint64_t res = my_timer_cycles();
200 return res != 0 ? res >> 5 : random_64();
201}
202
203template <uint64_t random_64_func()>
204static inline uint64_t random_from_interval_gen(uint64_t low, uint64_t high) {
205 ut_ad(high >= low);
206 return low + (random_64_func() % (high - low + 1));
207}
208
209static inline uint64_t random_from_interval(uint64_t low, uint64_t high) {
210 return random_from_interval_gen<random_64>(low, high);
211}
212
213static inline uint64_t random_from_interval_fast(uint64_t low, uint64_t high) {
214 return random_from_interval_gen<random_64_fast>(low, high);
215}
216
217static inline uint64_t hash_uint64(uint64_t value) {
218#ifndef CRC32_DEFAULT
220 return crc32_hash_uint64(value);
221 }
222#endif /* !CRC32_DEFAULT */
223
224 /* This implements Tabulation Hashing. It is a simple yet effective algorithm
225 that easily passes the unit tests that check distributions of hash values
226 modulo different numbers. Other techniques like one from hash_uint64_pair_fast
227 or extended 4-universal version did have some problems for specific modulo
228 numbers or have been providing 61bit hashes. */
229 uint64_t res = 0;
230 for (size_t i = 0; i < sizeof(uint64_t); ++i) {
231 res ^= detail::tab_hash_lookup_table[(value >> (i * 8)) & 0xFF][i];
232 }
233 return res;
234}
235
236static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2) {
238}
239
240static inline uint64_t hash_string(const char *str) {
241 return hash_binary(reinterpret_cast<const byte *>(str), strlen(str));
242}
243
244static inline uint64_t hash_binary(const byte *str, size_t len, uint64_t seed) {
245 /* Our implementation is slower than CRC32 while the CRC32 implementation is
246 super fast, especially for higher lengths as it leverages 3-fold parallelism
247 on CPU core pipeline level. */
248 if (len >= 15) {
249 /* The CRC32 is returning only 32bits of hash. We take out the first 8 bytes
250 to seed our 64bit hash just like we do when CRC32 is not used. */
251 const auto h = hash_uint64_pair(seed, detail::read_from_8(str));
252 return hash_uint64_pair(h, ut_crc32(str + 8, len - 8));
253 }
254
255 /* The following algorithm is based on idea of applying hash_uint64_pair for
256 64bit blocks of data. If there are less than 8 bytes remaining, we hash
257 remaining 4, 2 and 1 bytes for all input bytes have influence on result hash.
258 */
259
260 uint64_t h = seed;
261 size_t i = 0;
262 if (len & 8) {
264 i += 8;
265 }
266 uint64_t last_part = 0;
267 if ((len & 7) == 0) {
268 return h;
269 }
270 if (len & 4) {
271 last_part |= detail::read_from_4(str + i) << (16 + 8);
272 i += 4;
273 }
274 if (len & 2) {
275 last_part |= detail::read_from_2(str + i) << 8;
276 i += 2;
277 }
278 if (len & 1) {
279 last_part |= detail::read_from_1(str + i);
280 i += 1;
281 }
282 ut_ad(i == len);
283 return hash_uint64_pair(h, last_part);
284}
285
286static inline uint32_t hash_binary_ib(const byte *str, size_t len) {
287 uint32_t hash_value = 0;
288 const byte *str_end = str + (len & 0xFFFFFFF8);
289
290 ut_ad(str || !len);
291
292 while (str < str_end) {
293 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
294 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
295 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
296 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
297 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
298 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
299 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
300 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
301 }
302
303 switch (len & 0x7) {
304 case 7:
305 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
306 [[fallthrough]];
307 case 6:
308 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
309 [[fallthrough]];
310 case 5:
311 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
312 [[fallthrough]];
313 case 4:
314 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
315 [[fallthrough]];
316 case 3:
317 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
318 [[fallthrough]];
319 case 2:
320 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
321 [[fallthrough]];
322 case 1:
323 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
324 }
325
326 return (hash_value);
327}
328} // namespace ut
329
330#endif
ulonglong my_timer_cycles(void)
A cycle timer.
Definition: my_rdtsc.cc:97
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1063
Definition: buf0block_hint.cc:29
Definition: ut0tuple.h:56
constexpr uint64_t fast_hash_coeff_b_64bit
Definition: ut0rnd.h:56
constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2)
A helper method, it is used by hash_binary_ib for backward compatibility.
Definition: ut0rnd.h:131
static int64_t read_from_4(const byte *addr)
Definition: ut0rnd.h:155
static int64_t read_from_8(const byte *addr)
Definition: ut0rnd.h:158
constexpr uint64_t fast_hash_coeff_a1_64bit
Definition: ut0rnd.h:54
thread_local uint64_t random_seed
Seed value of ut::random_64()
Definition: ut0rnd.cc:39
static int64_t read_from_1(const byte *addr)
Definition: ut0rnd.h:151
constexpr uint64_t fast_hash_coeff_a2_64bit
Definition: ut0rnd.h:55
std::array< std::array< uint64_t, 8 >, 256 > tab_hash_lookup_table
Table for Tabulation Hashing, precomputed hash values for all byte values.
Definition: ut0rnd.cc:43
static int64_t read_from_2(const byte *addr)
Definition: ut0rnd.h:152
constexpr uint64_t hash_uint64_pair_fast(uint64_t n1, uint64_t n2)
Calculates 64bit hash from two 64bit inputs.
Definition: ut0rnd.h:165
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:47
static uint64_t random_from_interval(uint64_t low, uint64_t high)
Generates a pseudo-random integer from a given interval.
Definition: ut0rnd.h:209
static uint64_t hash_string(const char *str)
Hashes a character string ending in the null character.
Definition: ut0rnd.h:240
static uint64_t hash_binary(const byte *buf, size_t len, uint64_t seed=detail::fast_hash_coeff_a1_64bit)
Hashes a binary buffer of given length.
Definition: ut0rnd.h:244
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:217
static uint32_t hash_binary_ib(const byte *str, size_t len)
Hashes a binary buffer of given length in the old innobase way.
Definition: ut0rnd.h:286
static uint64_t random_64_fast()
The following function returns fine clock count as random value.
Definition: ut0rnd.h:196
static uint64_t random_64()
The following function generates pseudo-random 64bit integers which enumerate the value space generat...
Definition: ut0rnd.h:191
static uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2)
Hashes a pair of 64bit integers.
Definition: ut0rnd.h:236
static uint64_t random_from_interval_fast(uint64_t low, uint64_t high)
Generates a light-weight pseudo-random integer from a given interval.
Definition: ut0rnd.h:213
static uint64_t random_from_interval_gen(uint64_t low, uint64_t high)
Definition: ut0rnd.h:204
Version control for database, common definitions, and include files.
Utilities for byte operations.
Utilities related to class lifecycle.
CRC32 implementation.
bool ut_crc32_cpu_enabled
Flag that tells whether the CPU supports CRC32 or not.
Definition: crc32.cc:102
ut_crc32_func_t ut_crc32
Pointer to standard-compliant CRC32-C (using the GF(2) primitive polynomial 0x11EDC6F41) calculation ...
Definition: crc32.cc:100
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:68
Math functions.
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...