MySQL 9.1.0
Source Code Documentation
ut0rnd.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0rnd.h
29 Random numbers and hashing
30
31 Created 1/20/1994 Heikki Tuuri
32 ***********************************************************************/
33
34#ifndef ut0rnd_h
35#define ut0rnd_h
36
37#include <atomic>
38#include <cstdint>
39
40#include "univ.i"
41#include "ut0byte.h"
42#include "ut0math.h"
43
44#include "ut0class_life_cycle.h"
45#include "ut0crc32.h"
46#include "ut0seq_lock.h"
47
48namespace ut {
49/** The following function generates pseudo-random 64bit integers which
50enumerate the value space generated by a linear congruence. This is very similar
51to what can be achieved with std::linear_congruential_engine<uint64_t, ...>. If
52you need reliably good pseudo-random numbers for some reason, please consider
53using std::mersenne_twister_engine<>.
54@return a pseudo-random number */
55[[nodiscard]] static inline uint64_t random_64();
56
57/** The following function returns fine clock count as random value.
58This is used for cases which need performance more than the true randomness.
59It is not causing any loads or stores, so it is very CPU-cache-friendly.
60Even for very frequent use, it doesn't cause any CPU cache pollution.
61@return a pseudo-random number */
62[[nodiscard]] static inline uint64_t random_64_fast();
63
64/** Generates a pseudo-random integer from a given interval.
65@param[in] low low limit; can generate also this value
66@param[in] high high limit; can generate also this value
67@return the pseudo-random number within the [low, high] two-side inclusive range
68*/
69[[nodiscard]] static inline uint64_t random_from_interval(uint64_t low,
70 uint64_t high);
71
72/** Generates a light-weight pseudo-random integer from a given interval.
73This is used for the cases which need performance more than the true randomness.
74@param[in] low low limit; can generate also this value
75@param[in] high high limit; can generate also this value
76@return the pseudo-random number within the [low, high] two-side inclusive range
77*/
78[[nodiscard]] static inline uint64_t random_from_interval_fast(uint64_t low,
79 uint64_t high);
80
81/** Hashes a 64-bit integer.
82@param[in] value 64-bit integer
83@return hashed value */
84[[nodiscard]] static inline uint64_t hash_uint64(uint64_t value);
85
86/** Hashes a character string ending in the null character.
87 @return hashed value */
88[[nodiscard]] static inline uint64_t hash_string(const char *str);
89
90/** Hashes a pair of 64bit integers.
91@param[in] n1 first 64bit integer
92@param[in] n2 second 64bit integer
93@return hashed value */
94[[nodiscard]] static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2);
95/** Hashes a binary buffer of given length.
96@param[in] buf buffer of bytes
97@param[in] len length
98@param[in] seed seed to be used in calculation. Can be previous value. A
99 default value is just a randomly chosen number.
100@return hashed value */
101[[nodiscard]] static inline uint64_t hash_binary(
102 const byte *buf, size_t len, uint64_t seed = 0xacb1f3526e25dd39);
103
104/** Hashes a binary buffer of given length in the old innobase way. This is
105highly inefficient, don't use outside areas that require backward compatibility
106on data written to disk.
107@param[in] str buffer of bytes
108@param[in] len length
109@return hashed value */
110[[nodiscard]] static inline uint32_t hash_binary_ib(const byte *str,
111 size_t len);
112
113namespace detail {
114/** Seed value of ut::random_64() */
115extern thread_local uint64_t random_seed;
116
117/** A helper method, it is used by hash_binary_ib for backward compatibility.
118NOTE: Do not use this method, it produces results that are not hashed well.
119Especially for sequences of pairs of <i+n, j+n> over n. */
120constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2) {
121 constexpr uint32_t HASH_RANDOM_MASK = 1463735687;
122 constexpr uint32_t HASH_RANDOM_MASK2 = 1653893711;
123
124 return ((((n1 ^ n2 ^ HASH_RANDOM_MASK2) << 8) + n1) ^ HASH_RANDOM_MASK) + n2;
125}
126
127/* Helper methods to read unaligned little-endian integers. These should be
128optimized to just simple, single reads on little-endian architecture like x86
129and x64. It may be different for SPARC (and in particular read_from_8(&some_u64)
130is not equal to some_u64's value. On SPARC the unaligned reads are prohibited,
131so for buffer that is not known to be aligned we will read data byte by byte.
132Note that the following methods and usages similar to what we do in
133hash_binary() could be applied to crc32 algorithm calculations and an early
134investigation showed 10-50% improvement in CRC32 speed for buffer lengths <=
135500. However, there was a lot of effort put into crc32 implementation to make
136sure it is efficient and generates correct, simple assembly for x64, ARM (and M1
137CPU) and other architectures. So in order to incorporate these ideas into CRC32
138a lot of testing is required to be performed again, with care. */
139
140static inline int64_t read_from_1(const byte *addr) { return *addr; }
141static inline int64_t read_from_2(const byte *addr) {
142 return read_from_1(addr) | (read_from_1(addr + 1) << 8);
143}
144static inline int64_t read_from_4(const byte *addr) {
145 return read_from_2(addr) | (read_from_2(addr + 2) << 16);
146}
147static inline int64_t read_from_8(const byte *addr) {
148 return read_from_4(addr) | (read_from_4(addr + 4) << 32);
149}
150
151/** Table for Tabulation Hashing, precomputed hash values for all byte values.
152The table could be for different amount of bits per chunk than 8, but 8 seems to
153yield faster hash calculation than 4 or 16bits - 4bits require 16 XOR operations
154and sets of bit manipulations, while 16bit require bigger tables that slow down
155caches. The indexes are set such that hash values for a specific byte value are
156stored in the same cache line (8 values * 8 bytes = 64B). This way it should be
157much faster and less demanding on CPU caches to calculate results for small
158integers (where most bytes are 0). */
159extern std::array<std::array<uint64_t, 8>, 256> tab_hash_lookup_table;
160
161} // namespace detail
162
163static inline uint64_t random_64() {
165 return detail::random_seed;
166}
167
168static inline uint64_t random_64_fast() {
169 /* Granularity of my_timer_cycles() might be over 1, to keep constant rate for
170 frequency changes of CPU core clocks. Drops lower 5 bits. */
171 const uint64_t res = my_timer_cycles();
172 return res != 0 ? res >> 5 : random_64();
173}
174
175template <uint64_t random_64_func()>
176static inline uint64_t random_from_interval_gen(uint64_t low, uint64_t high) {
177 ut_ad(high >= low);
178 return low + (random_64_func() % (high - low + 1));
179}
180
181static inline uint64_t random_from_interval(uint64_t low, uint64_t high) {
182 return random_from_interval_gen<random_64>(low, high);
183}
184
185static inline uint64_t random_from_interval_fast(uint64_t low, uint64_t high) {
186 return random_from_interval_gen<random_64_fast>(low, high);
187}
188
189static inline uint64_t hash_uint64(uint64_t value) {
190#ifndef CRC32_DEFAULT
192 return crc32_hash_uint64(value);
193 }
194#endif /* !CRC32_DEFAULT */
195
196 /* This implements Tabulation Hashing. It is a simple yet effective algorithm
197 that easily passes the unit tests that check distributions of hash values
198 modulo different numbers. Other techniques like one from hash_uint64_pair_fast
199 or extended 4-universal version did have some problems for specific modulo
200 numbers or have been providing 61bit hashes. */
201 uint64_t res = 0;
202 for (size_t i = 0; i < sizeof(uint64_t); ++i) {
203 res ^= detail::tab_hash_lookup_table[(value >> (i * 8)) & 0xFF][i];
204 }
205 return res;
206}
207
208static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2) {
209 return hash_uint64(hash_uint64(n1) ^ n2);
210}
211
212static inline uint64_t hash_string(const char *str) {
213 return hash_binary(reinterpret_cast<const byte *>(str), strlen(str));
214}
215
216static inline uint64_t hash_binary(const byte *str, size_t len, uint64_t seed) {
217 /* Our implementation is slower than CRC32 while the CRC32 implementation is
218 super fast, especially for higher lengths as it leverages 3-fold parallelism
219 on CPU core pipeline level. */
220 if (len >= 15) {
221 /* The CRC32 is returning only 32bits of hash. We take out the first 8 bytes
222 to seed our 64bit hash just like we do when CRC32 is not used. */
223 const auto h = hash_uint64_pair(seed, detail::read_from_8(str));
224 return hash_uint64_pair(h, ut_crc32(str + 8, len - 8));
225 }
226
227 /* The following algorithm is based on idea of applying hash_uint64_pair for
228 64bit blocks of data. If there are less than 8 bytes remaining, we hash
229 remaining 4, 2 and 1 bytes for all input bytes have influence on result hash.
230 */
231
232 uint64_t h = seed;
233 size_t i = 0;
234 if (len & 8) {
236 i += 8;
237 }
238 uint64_t last_part = 0;
239 if ((len & 7) == 0) {
240 return h;
241 }
242 if (len & 4) {
243 last_part |= detail::read_from_4(str + i) << (16 + 8);
244 i += 4;
245 }
246 if (len & 2) {
247 last_part |= detail::read_from_2(str + i) << 8;
248 i += 2;
249 }
250 if (len & 1) {
251 last_part |= detail::read_from_1(str + i);
252 i += 1;
253 }
254 ut_ad(i == len);
255 return hash_uint64_pair(h, last_part);
256}
257
258static inline uint32_t hash_binary_ib(const byte *str, size_t len) {
259 uint32_t hash_value = 0;
260 const byte *str_end = str + (len & 0xFFFFFFF8);
261
262 ut_ad(str || !len);
263
264 while (str < str_end) {
265 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
266 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
267 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
268 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
269 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
270 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
271 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
272 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
273 }
274
275 switch (len & 0x7) {
276 case 7:
277 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
278 [[fallthrough]];
279 case 6:
280 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
281 [[fallthrough]];
282 case 5:
283 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
284 [[fallthrough]];
285 case 4:
286 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
287 [[fallthrough]];
288 case 3:
289 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
290 [[fallthrough]];
291 case 2:
292 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
293 [[fallthrough]];
294 case 1:
295 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
296 }
297
298 return (hash_value);
299}
300} // namespace ut
301
302#endif
ulonglong my_timer_cycles(void)
A cycle timer.
Definition: my_rdtsc.cc:98
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1105
Definition: buf0block_hint.cc:30
Definition: ut0tuple.h:57
constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2)
A helper method, it is used by hash_binary_ib for backward compatibility.
Definition: ut0rnd.h:120
static int64_t read_from_4(const byte *addr)
Definition: ut0rnd.h:144
static int64_t read_from_8(const byte *addr)
Definition: ut0rnd.h:147
thread_local uint64_t random_seed
Seed value of ut::random_64()
Definition: ut0rnd.cc:40
static int64_t read_from_1(const byte *addr)
Definition: ut0rnd.h:140
std::array< std::array< uint64_t, 8 >, 256 > tab_hash_lookup_table
Table for Tabulation Hashing, precomputed hash values for all byte values.
Definition: ut0rnd.cc:44
static int64_t read_from_2(const byte *addr)
Definition: ut0rnd.h:141
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
static uint64_t random_from_interval(uint64_t low, uint64_t high)
Generates a pseudo-random integer from a given interval.
Definition: ut0rnd.h:181
static uint64_t hash_string(const char *str)
Hashes a character string ending in the null character.
Definition: ut0rnd.h:212
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:189
static uint32_t hash_binary_ib(const byte *str, size_t len)
Hashes a binary buffer of given length in the old innobase way.
Definition: ut0rnd.h:258
static uint64_t hash_binary(const byte *buf, size_t len, uint64_t seed=0xacb1f3526e25dd39)
Hashes a binary buffer of given length.
Definition: ut0rnd.h:216
static uint64_t random_64_fast()
The following function returns fine clock count as random value.
Definition: ut0rnd.h:168
static uint64_t random_64()
The following function generates pseudo-random 64bit integers which enumerate the value space generat...
Definition: ut0rnd.h:163
static uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2)
Hashes a pair of 64bit integers.
Definition: ut0rnd.h:208
static uint64_t random_from_interval_fast(uint64_t low, uint64_t high)
Generates a light-weight pseudo-random integer from a given interval.
Definition: ut0rnd.h:185
static uint64_t random_from_interval_gen(uint64_t low, uint64_t high)
Definition: ut0rnd.h:176
Version control for database, common definitions, and include files.
Utilities for byte operations.
Utilities related to class lifecycle.
CRC32 implementation.
bool ut_crc32_cpu_enabled
Flag that tells whether the CPU supports CRC32 or not.
Definition: crc32.cc:103
ut_crc32_func_t ut_crc32
Pointer to standard-compliant CRC32-C (using the GF(2) primitive polynomial 0x11EDC6F41) calculation ...
Definition: crc32.cc:101
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:105
Math functions.
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...