MySQL 8.0.40
Source Code Documentation
ut0math.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2021, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0math.h
29 Math functions.
30
31 ***********************************************************************/
32
33#ifndef ut0math_h
34#define ut0math_h
35
36#include <atomic>
37#include <cstdint>
38#include "ut0class_life_cycle.h"
39#include "ut0dbg.h"
40#include "ut0seq_lock.h"
41
42namespace ut {
43/** Portable replacement for std::countr_zero(uint64_t) from C++20
44@param x the number you wish to count least significant zeros of
45@return number of least significant zeros, which can be between 0 and 64
46inclusive. */
47static inline int countr_zero(uint64_t x) {
48 /* ~ changes trailing 0s to 1s, and the least significant 1 to 0
49 +1 causes 1s to flip to 0s again, but carry over changes 0 to 1
50 x& will not match anywhere except that least significant 1
51 Note: it could be entirely missing if x==0, which is fine. */
52 x &= ~x + 1;
53 /* The bit sequence of this constant is such, that each 6-bit window
54 of it (with wrap-around) is different. It is an Euler's cycle in the
55 graph with 32 5-bit nodes, and 64 1-bit edges, found by:
56 void dfs(int n){
57 for(int i=0;i<2;++i){
58 if(!passed[n][i]){
59 passed[n][i]=true;
60 dfs(((n<<1)|i)&31); // add i to node label, drop highest bit
61 backtrack.push_back(i); // edge label
62 }
63 }
64 }
65 Of all rotations of such cycle we pick starting with 6 zeros. This
66 way each shift of this constant has a different topmost 6 bits. */
67 constexpr uint64_t de_brujin_sequence = 151050438420815295u;
68 /* should fit one cache line in data section */
69 constexpr static uint8_t ans[64] = {
70 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
71 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
72 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
73 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58,
74 };
75 return x == 0 ? 64 : ans[(x * de_brujin_sequence >> (64 - 6)) & 63];
76}
77
78/** Computes the result of division rounded towards positive infinity.
79@param[in] numerator The number you want to be divided
80@param[in] denominator The number you want to divide by
81@return ceil(numerator/denominator). */
82template <typename T>
83constexpr T div_ceil(T numerator, T denominator) {
84 static_assert(std::is_integral_v<T>, "div_ceil<T> needs integral T");
85 /* see https://gist.github.com/Eisenwave/2a7d7a4e74e99bbb513984107a6c63ef
86 for list of common pitfalls, and this beautiful solution which compiles to
87 - branchless code with one division operation for unsigned ints,
88 - branchless (but longer) code with one division operation for signed ints,
89 - branchless code with just shifts and adds for constant d=constexpr 2^k,
90 - branchless code with multiplication instead of division for constexpr d
91 All that correctly handling negative numerators, denominators, and values
92 close to or equal to the max() or min(). */
93 const bool quotient_not_negative{(numerator < 0) == (denominator < 0)};
94 return numerator / denominator +
95 (quotient_not_negative && numerator % denominator != 0);
96}
97
98/** Calculates the 128bit result of multiplication of the two specified 64bit
99integers. May use CPU native instructions for speed of standard uint64_t
100multiplication.
101@param[in] x First number to multiply.
102@param[in] y Second number to multiply.
103@param[out] hi A reference to 64bit integer that will store higher 64bits of the
104result.
105@return The lower 64bit of the result. */
106[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
107 uint64_t &hi);
108
109/*Calculates the 64bit result of division of the specified 128bit integer by the
110specified 64bit integer. The result must fit in 64bit or else the behavior is
111undefined. Currently does not use native CPU instructions and can be quite slow.
112@param[in] high High 64bits of the number to divide.
113@param[in] low Low 64bits of the number to divide.
114@param[in] div The number to divide by.
115@return The lower 64bit of the result. */
116[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
117 uint64_t div);
118class fast_modulo_t;
119
120/** Looks for a prime number slightly greater than the given argument.
121The prime is chosen so that it is not near any power of 2.
122@param[in] n positive number > 100
123@return prime */
124[[nodiscard]] uint64_t find_prime(uint64_t n);
125
126namespace detail {
127/** Calculates the 128bit result of multiplication of the two specified 64bit
128integers.
129@param[in] x First number to multiply.
130@param[in] y Second number to multiply.
131@param[out] hi A reference to 64bit integer that will store higher 64bits of the
132result.
133@return The lower 64bit of the result. */
134[[nodiscard]] constexpr uint64_t multiply_uint64_portable(uint64_t x,
135 uint64_t y,
136 uint64_t &hi) {
137 uint32_t x_hi = static_cast<uint32_t>(x >> 32);
138 uint32_t x_lo = static_cast<uint32_t>(x);
139 uint32_t y_hi = static_cast<uint32_t>(y >> 32);
140 uint32_t y_lo = static_cast<uint32_t>(y);
141
142 uint64_t hi_lo = static_cast<uint64_t>(x_hi) * y_lo;
143
144 uint64_t low = static_cast<uint64_t>(x_lo) * y_lo;
145 /* This will not overflow, as (2^32 -1)^2 = 2^64 - 1 - 2 * 2^32, so there is
146 still a place for two 32bit integers to be added. */
147 uint64_t mid = (low >> 32) + static_cast<uint64_t>(x_lo) * y_hi +
148 static_cast<uint32_t>(hi_lo);
149 hi = (mid >> 32) + static_cast<uint64_t>(x_hi) * y_hi + (hi_lo >> 32);
150 return static_cast<uint32_t>(low) + (mid << 32);
151}
152} // namespace detail
153
154#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
155/* MSVC x86 supports native uint64_t -> uint128_t multiplication */
156#include <intrin.h>
157#pragma intrinsic(_umul128)
158[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
159 uint64_t &hi) {
160 return _umul128(x, y, &hi);
161}
162#elif defined(__SIZEOF_INT128__)
163/* Compiler supports 128-bit values (GCC/Clang) */
164
165[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
166 uint64_t &hi) {
167 unsigned __int128 res = (unsigned __int128)x * y;
168 hi = static_cast<uint64_t>(res >> 64);
169 return static_cast<uint64_t>(res);
170}
171#else
172[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
173 uint64_t &hi) {
174 return detail::multiply_uint64_portable(x, y, hi);
175}
176#endif
177
178[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
179 uint64_t div) {
180 uint64_t res = 0;
181 for (auto current_bit = 63; current_bit >= 0; current_bit--) {
182 const auto div_hi = current_bit ? (div >> (64 - current_bit)) : 0;
183 const auto div_lo = div << current_bit;
184 if (div_hi < high || (div_hi == high && div_lo <= low)) {
185 high -= div_hi;
186 if (low < div_lo) {
187 high--;
188 }
189 low -= div_lo;
190 res += 1ULL << current_bit;
191 }
192 }
193 return res;
194}
195
196/** Allows to execute x % mod for a specified mod in a fast way, without using a
197slow operation of division. The additional cost is hidden in constructor to
198preprocess the mod constant. */
200 /* Idea behind this implementation is following: (division sign in all
201 equations below is to be treated as mathematical division on reals)
202
203 x % mod = x - floor(x/mod)*mod
204
205 and...
206
207 x / mod = x * 1/mod = (x * (BIG/mod)) /BIG
208
209 and..
210
211 floor(x/mod) = x / mod - epsilon, where 0<=epsilon<1
212
213 Now, lets define:
214
215 M = floor(BIG/mod)
216
217 And take a look at the value of following expression:
218
219 floor( x*M / BIG) * mod =
220
221 floor(x * floor(BIG/mod) / BIG) * mod =
222 floor(x * ((BIG/mod)-epsilon1) / BIG) * mod =
223 ((x*((BIG/mod)-epsilon1)/BIG - epsilon2) * mod
224
225 This sure looks ugly, but it has interesting properties:
226 (1) is divisible by mod, which you can see, because it has a form (...)*
227 mod
228 (2) is smaller or equal to x, which you can see by setting epsilons to 0
229 (3) assuming BIG>x, the expression is strictly larger than x - 2*mod,
230 because it must be larger than the value for epsilons=1, which is:
231 ((x*((BIG/mod)-1))/BIG - 1) * mod =
232 ((x*BIG/mod - x)/BIG -1) * mod =
233 ((x/mod - x/BIG) - 1) * mod =
234 (x - x/BIG*mod - mod)
235 (4) we can compute it without using division at all, if BIG is 1<<k,
236 as it simplifies to
237 (( x * M ) >> k ) * mod
238
239 So, assuming BIG>x, and is a power of two (say BIG=1<<64), we get an
240 expression, which is divisible by mod, and if we subtract it from x, we get
241 something in the range [0...,2mod). What is left is to compare against mod,
242 and subtract it if it is higher.
243 */
244
245 public:
246 fast_modulo_t() = default;
247 explicit fast_modulo_t(uint64_t mod)
248 : m_mod(mod), m_inv(precompute_inv(mod)) {}
249 explicit fast_modulo_t(uint64_t mod, uint64_t inv) : m_mod(mod), m_inv(inv) {}
250
251 /** Computes the value of x % mod. */
252 uint64_t compute(uint64_t x) const {
253 uint64_t hi;
254 (void)multiply_uint64(x, m_inv, hi);
255
256 const uint64_t guess = hi * m_mod;
257 const uint64_t rest = x - guess;
258
259 return rest - (m_mod <= rest) * m_mod;
260 }
261
262 /** Gets the precomputed value of inverse. */
263 uint64_t get_inverse() const { return m_inv; }
264
265 /** Gets the modulo value. */
266 uint64_t get_mod() const { return m_mod; }
267
268 /** Precomputes the inverse needed for fast modulo operations. */
269 static uint64_t precompute_inv(uint64_t mod) {
270 /* pedantic matter: for mod=1 -- you can remove it if you never plan to use
271 it for 1. */
272 if (mod == 1) {
273 /* According to equations we want M to be 1<<64, but this overflows
274 uint64_t, so, let's do the second best thing we can, which is 1<<64-1,
275 this means that our `guess` will be ((x<<64 - x) >> 64)*mod, which for
276 x=0, is 0 (good), and for x>0 is (x-1)*mod = (x-1)*1 = x-1, and then
277 rest=1, which is also good enough (<2*mod). */
278 return ~uint64_t{0};
279 } else {
280 return divide_128(1, 0, mod);
281 }
282 }
283
284 private:
285 uint64_t m_mod{0};
286 uint64_t m_inv{0};
287};
288
289/** A class that allows to atomically set new modulo value for fast modulo
290computations. */
292 public:
293 mt_fast_modulo_t() : m_data{0ULL, 0ULL} {}
294 explicit mt_fast_modulo_t(uint64_t mod)
295 : m_data{mod, fast_modulo_t::precompute_inv(mod)} {}
296 /* This class can be made copyable, but this requires additional constructors.
297 */
298
300 return m_data.read([](const data_t &stored_data) {
301 return fast_modulo_t{stored_data.m_mod.load(std::memory_order_relaxed),
302 stored_data.m_inv.load(std::memory_order_relaxed)};
303 });
304 }
305
306 void store(uint64_t new_mod) {
307 const fast_modulo_t new_fast_modulo{new_mod};
308 const auto inv = new_fast_modulo.get_inverse();
309 m_data.write([&](data_t &data) {
310 data.m_mod.store(new_mod, std::memory_order_relaxed);
311 data.m_inv.store(inv, std::memory_order_relaxed);
312 });
313 }
314
315 private:
316 struct data_t {
317 std::atomic<uint64_t> m_mod;
318 std::atomic<uint64_t> m_inv;
319 };
320
322};
323
324} // namespace ut
325
326static inline uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm) {
327 return fm.compute(x);
328}
329
330#endif
A utility class which, if inherited from, prevents the descendant class from being copied,...
Definition: ut0class_life_cycle.h:41
A class that allows to read value of variable of some type T atomically and allows the value to be ch...
Definition: ut0seq_lock.h:49
Allows to execute x % mod for a specified mod in a fast way, without using a slow operation of divisi...
Definition: ut0math.h:199
uint64_t get_inverse() const
Gets the precomputed value of inverse.
Definition: ut0math.h:263
uint64_t m_inv
Definition: ut0math.h:286
uint64_t compute(uint64_t x) const
Computes the value of x % mod.
Definition: ut0math.h:252
fast_modulo_t(uint64_t mod)
Definition: ut0math.h:247
static uint64_t precompute_inv(uint64_t mod)
Precomputes the inverse needed for fast modulo operations.
Definition: ut0math.h:269
uint64_t m_mod
Definition: ut0math.h:285
fast_modulo_t(uint64_t mod, uint64_t inv)
Definition: ut0math.h:249
uint64_t get_mod() const
Gets the modulo value.
Definition: ut0math.h:266
fast_modulo_t()=default
A class that allows to atomically set new modulo value for fast modulo computations.
Definition: ut0math.h:291
mt_fast_modulo_t()
Definition: ut0math.h:293
mt_fast_modulo_t(uint64_t mod)
Definition: ut0math.h:294
void store(uint64_t new_mod)
Definition: ut0math.h:306
Seq_lock< data_t > m_data
Definition: ut0math.h:321
fast_modulo_t load() const
Definition: ut0math.h:299
Definition: ut0tuple.h:57
constexpr uint64_t multiply_uint64_portable(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:134
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
constexpr T div_ceil(T numerator, T denominator)
Computes the result of division rounded towards positive infinity.
Definition: ut0math.h:83
static int countr_zero(uint64_t x)
Portable replacement for std::countr_zero(uint64_t) from C++20.
Definition: ut0math.h:47
uint64_t find_prime(uint64_t n)
Looks for a prime number slightly greater than the given argument.
Definition: ut0math.cc:36
static uint64_t multiply_uint64(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:172
static uint64_t divide_128(uint64_t high, uint64_t low, uint64_t div)
Definition: ut0math.h:178
Definition: ut0math.h:316
std::atomic< uint64_t > m_mod
Definition: ut0math.h:317
std::atomic< uint64_t > m_inv
Definition: ut0math.h:318
Utilities related to class lifecycle.
Debug utilities for Innobase.
static uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm)
Definition: ut0math.h:326
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...
int n
Definition: xcom_base.cc:509