MySQL 8.0.33
Source Code Documentation
ut0rnd.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2023, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25*****************************************************************************/
26
27/** @file include/ut0rnd.h
28 Random numbers and hashing
29
30 Created 1/20/1994 Heikki Tuuri
31 ***********************************************************************/
32
33#ifndef ut0rnd_h
34#define ut0rnd_h
35
36#include <atomic>
37#include <cstdint>
38
39#include "univ.i"
40#include "ut0byte.h"
41#include "ut0math.h"
42
43#include "ut0class_life_cycle.h"
44#include "ut0crc32.h"
45#include "ut0seq_lock.h"
46
47namespace ut {
48namespace detail {
49
50/* These are three randomly generated 64bit values, with additional constraint
51that a1 and a2 must be odd. These are used to perform hashing using algorithm
52described on https://sortingsearching.com/2020/05/21/hashing.html */
53
54constexpr uint64_t fast_hash_coeff_a1_64bit = 0xacb1f3526e25dd39;
55constexpr uint64_t fast_hash_coeff_a2_64bit = 0xd65b9175b5247ba3;
56constexpr uint64_t fast_hash_coeff_b_64bit = 0xf72a876a516b4b56;
57
58} // namespace detail
59
60/** The following function generates pseudo-random 64bit integers which
61enumerate the value space generated by a linear congruence. This is very similar
62to what can be achieved with std::linear_congruential_engine<uint64_t, ...>. If
63you need reliably good pseudo-random numbers for some reason, please consider
64using std::mersenne_twister_engine<>.
65@return a pseudo-random number */
66[[nodiscard]] static inline uint64_t random_64();
67
68/** Generates a pseudo-random integer from a given interval.
69@param[in] low low limit; can generate also this value
70@param[in] high high limit; can generate also this value
71@return the pseudo-random number within the [low, high] two-side inclusive range
72*/
73[[nodiscard]] static inline uint64_t random_from_interval(uint64_t low,
74 uint64_t high);
75
76/** Hashes a 64-bit integer.
77@param[in] value 64-bit integer
78@return hashed value */
79[[nodiscard]] static inline uint64_t hash_uint64(uint64_t value);
80
81/** Hashes a character string ending in the null character.
82 @return hashed value */
83[[nodiscard]] static inline uint64_t hash_string(const char *str);
84
85/** Hashes a pair of 64bit integers.
86@param[in] n1 first 64bit integer
87@param[in] n2 second 64bit integer
88@return hashed value */
89[[nodiscard]] static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2);
90/** Hashes a binary buffer of given length.
91@param[in] buf buffer of bytes
92@param[in] len length
93@param[in] seed seed to be used in calculation. Can be previous value.
94@return hashed value */
95[[nodiscard]] static inline uint64_t hash_binary(
96 const byte *buf, size_t len,
97 uint64_t seed = detail::fast_hash_coeff_a1_64bit);
98
99/** Hashes a binary buffer of given length in the old innobase way. This is
100highly inefficient, don't use outside areas that require backward compatibility
101on data written to disk.
102@param[in] str buffer of bytes
103@param[in] len length
104@return hashed value */
105[[nodiscard]] static inline uint32_t hash_binary_ib(const byte *str,
106 size_t len);
107
108namespace detail {
109/** Seed value of ut::random_64() */
110extern thread_local uint64_t random_seed;
111
112/** A helper method, it is used by hash_binary_ib for backward compatibility.
113NOTE: Do not use this method, it produces results that are not hashed well.
114Especially for sequences of pairs of <i+n, j+n> over n. */
115constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2) {
116 constexpr uint32_t HASH_RANDOM_MASK = 1463735687;
117 constexpr uint32_t HASH_RANDOM_MASK2 = 1653893711;
118
119 return ((((n1 ^ n2 ^ HASH_RANDOM_MASK2) << 8) + n1) ^ HASH_RANDOM_MASK) + n2;
120}
121
122/* Helper methods to read unaligned little-endian integers. These should be
123optimized to just simple, single reads on little-endian architecture like x86
124and x64. It may be different for SPARC (and in particular read_from_8(&some_u64)
125is not equal to some_u64's value. On SPARC the unaligned reads are prohibited,
126so for buffer that is not known to be aligned we will read data byte by byte.
127Note that the following methods and usages similar to what we do in
128hash_binary() could be applied to crc32 algorithm calculations and an early
129investigation showed 10-50% improvement in CRC32 speed for buffer lengths <=
130500. However, there was a lot of effort put into crc32 implementation to make
131sure it is efficient and generates correct, simple assembly for x64, ARM (and M1
132CPU) and other architectures. So in order to incorporate these ideas into CRC32
133a lot of testing is required to be performed again, with care. */
134
135static inline int64_t read_from_1(const byte *addr) { return *addr; }
136static inline int64_t read_from_2(const byte *addr) {
137 return read_from_1(addr) | (read_from_1(addr + 1) << 8);
138}
139static inline int64_t read_from_4(const byte *addr) {
140 return read_from_2(addr) | (read_from_2(addr + 2) << 16);
141}
142static inline int64_t read_from_8(const byte *addr) {
143 return read_from_4(addr) | (read_from_4(addr + 4) << 32);
144}
145
146/** Calculates 64bit hash from two 64bit inputs. It is quite fast, but the
147quality of outputs is not best. The lower bits, the worse is their distribution.
148*/
149constexpr uint64_t hash_uint64_pair_fast(uint64_t n1, uint64_t n2) {
150 /* The following algorithm is very simple, yet proven to be quite good, that
151 is it is proven that for two different arguments it will yield different
152 results with 2^(-64) probability. This is based on algorithm for h3 function
153 for n=2, described on https://sortingsearching.com/2020/05/21/hashing.html
154 (which in turn references Ph.D. thesis of Philipp Wolfel. It does not
155 necessarily yield good distribution when consecutive integers are supplied as
156 inputs, especially if the results are taken modulo small numbers like 8 or 11.
157 */
161}
162
163/** Table for Tabulation Hashing, precomputed hash values for all byte values.
164The table could be for different amount of bits per chunk than 8, but 8 seems to
165yield faster hash calculation than 4 or 16bits - 4bits require 16 XOR operations
166and sets of bit manipulations, while 16bit require bigger tables that slow down
167caches. The indexes are set such that hash values for a specific byte value are
168stored in the same cache line (8 values * 8 bytes = 64B). This way it should be
169much faster and less demanding on CPU caches to calculate results for small
170integers (where most bytes are 0). */
171extern std::array<std::array<uint64_t, 8>, 256> tab_hash_lookup_table;
172
173} // namespace detail
174
175static inline uint64_t random_64() {
177}
178
179static inline uint64_t random_from_interval(uint64_t low, uint64_t high) {
180 ut_ad(high >= low);
181
182 return low + (random_64() % (high - low + 1));
183}
184
185static inline uint64_t hash_uint64(uint64_t value) {
186 /* This implements Tabulation Hashing. It is a simple yet effective algorithm
187 that easily passes the unit tests that check distributions of hash values
188 modulo different numbers. Other techniques like one from hash_uint64_pair_fast
189 or extended 4-universal version did have some problems for specific modulo
190 numbers or have been providing 61bit hashes. */
191 uint64_t res = 0;
192 for (size_t i = 0; i < sizeof(uint64_t); ++i) {
193 res ^= detail::tab_hash_lookup_table[(value >> (i * 8)) & 0xFF][i];
194 }
195 return res;
196}
197
198static inline uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2) {
200}
201
202static inline uint64_t hash_string(const char *str) {
203 return hash_binary(reinterpret_cast<const byte *>(str), strlen(str));
204}
205
206static inline uint64_t hash_binary(const byte *str, size_t len, uint64_t seed) {
207 /* Our implementation is slower than CRC32 while the CRC32 implementation is
208 super fast, especially for higher lengths as it leverages 3-fold parallelism
209 on CPU core pipeline level. */
210 if (len >= 15) {
211 /* The CRC32 is returning only 32bits of hash. We take out the first 8 bytes
212 to seed our 64bit hash just like we do when CRC32 is not used. */
213 const auto h = hash_uint64_pair(seed, detail::read_from_8(str));
214 return hash_uint64_pair(h, ut_crc32(str + 8, len - 8));
215 }
216
217 /* The following algorithm is based on idea of applying hash_uint64_pair for
218 64bit blocks of data. If there are less than 8 bytes remaining, we hash
219 remaining 4, 2 and 1 bytes for all input bytes have influence on result hash.
220 */
221
222 uint64_t h = seed;
223 size_t i = 0;
224 if (len & 8) {
226 i += 8;
227 }
228 uint64_t last_part = 0;
229 if ((len & 7) == 0) {
230 return h;
231 }
232 if (len & 4) {
233 last_part |= detail::read_from_4(str + i) << (16 + 8);
234 i += 4;
235 }
236 if (len & 2) {
237 last_part |= detail::read_from_2(str + i) << 8;
238 i += 2;
239 }
240 if (len & 1) {
241 last_part |= detail::read_from_1(str + i);
242 i += 1;
243 }
244 ut_ad(i == len);
245 return hash_uint64_pair(h, last_part);
246}
247
248static inline uint32_t hash_binary_ib(const byte *str, size_t len) {
249 uint32_t hash_value = 0;
250 const byte *str_end = str + (len & 0xFFFFFFF8);
251
252 ut_ad(str || !len);
253
254 while (str < str_end) {
255 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
256 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
257 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
258 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
259 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
260 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
261 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
262 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
263 }
264
265 switch (len & 0x7) {
266 case 7:
267 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
268 [[fallthrough]];
269 case 6:
270 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
271 [[fallthrough]];
272 case 5:
273 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
274 [[fallthrough]];
275 case 4:
276 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
277 [[fallthrough]];
278 case 3:
279 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
280 [[fallthrough]];
281 case 2:
282 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
283 [[fallthrough]];
284 case 1:
285 hash_value = detail::hash_uint32_pair_ib(hash_value, *str++);
286 }
287
288 return (hash_value);
289}
290} // namespace ut
291
292#endif
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1054
Definition: buf0block_hint.cc:29
Definition: ut0tuple.h:56
constexpr uint64_t fast_hash_coeff_b_64bit
Definition: ut0rnd.h:56
constexpr uint32_t hash_uint32_pair_ib(uint32_t n1, uint32_t n2)
A helper method, it is used by hash_binary_ib for backward compatibility.
Definition: ut0rnd.h:115
static int64_t read_from_4(const byte *addr)
Definition: ut0rnd.h:139
static int64_t read_from_8(const byte *addr)
Definition: ut0rnd.h:142
constexpr uint64_t fast_hash_coeff_a1_64bit
Definition: ut0rnd.h:54
thread_local uint64_t random_seed
Seed value of ut::random_64()
Definition: ut0rnd.cc:38
static int64_t read_from_1(const byte *addr)
Definition: ut0rnd.h:135
constexpr uint64_t fast_hash_coeff_a2_64bit
Definition: ut0rnd.h:55
std::array< std::array< uint64_t, 8 >, 256 > tab_hash_lookup_table
Table for Tabulation Hashing, precomputed hash values for all byte values.
Definition: ut0rnd.cc:42
static int64_t read_from_2(const byte *addr)
Definition: ut0rnd.h:136
constexpr uint64_t hash_uint64_pair_fast(uint64_t n1, uint64_t n2)
Calculates 64bit hash from two 64bit inputs.
Definition: ut0rnd.h:149
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:47
static uint64_t random_from_interval(uint64_t low, uint64_t high)
Generates a pseudo-random integer from a given interval.
Definition: ut0rnd.h:179
static uint64_t hash_string(const char *str)
Hashes a character string ending in the null character.
Definition: ut0rnd.h:202
static uint64_t hash_binary(const byte *buf, size_t len, uint64_t seed=detail::fast_hash_coeff_a1_64bit)
Hashes a binary buffer of given length.
Definition: ut0rnd.h:206
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:185
static uint32_t hash_binary_ib(const byte *str, size_t len)
Hashes a binary buffer of given length in the old innobase way.
Definition: ut0rnd.h:248
static uint64_t random_64()
The following function generates pseudo-random 64bit integers which enumerate the value space generat...
Definition: ut0rnd.h:175
static uint64_t hash_uint64_pair(uint64_t n1, uint64_t n2)
Hashes a pair of 64bit integers.
Definition: ut0rnd.h:198
Version control for database, common definitions, and include files.
Utilities for byte operations.
Utilities related to class lifecycle.
CRC32 implementation.
ut_crc32_func_t ut_crc32
Pointer to standard-compliant CRC32-C (using the GF(2) primitive polynomial 0x11EDC6F41) calculation ...
Definition: crc32.cc:128
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:68
Math functions.
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...