MySQL 9.1.0
Source Code Documentation
ut0math.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2021, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0math.h
29 Math functions.
30
31 ***********************************************************************/
32
33#ifndef ut0math_h
34#define ut0math_h
35
36#include <atomic>
37#include <cstdint>
38#include "ut0class_life_cycle.h"
39#include "ut0dbg.h"
40#include "ut0seq_lock.h"
41
42namespace ut {
43
44/** Computes the result of division rounded towards positive infinity.
45@param[in] numerator The number you want to be divided
46@param[in] denominator The number you want to divide by
47@return ceil(numerator/denominator). */
48template <typename T>
49constexpr T div_ceil(T numerator, T denominator) {
50 static_assert(std::is_integral_v<T>, "div_ceil<T> needs integral T");
51 /* see https://gist.github.com/Eisenwave/2a7d7a4e74e99bbb513984107a6c63ef
52 for list of common pitfalls, and this beautiful solution which compiles to
53 - branchless code with one division operation for unsigned ints,
54 - branchless (but longer) code with one division operation for signed ints,
55 - branchless code with just shifts and adds for constant d=constexpr 2^k,
56 - branchless code with multiplication instead of division for constexpr d
57 All that correctly handling negative numerators, denominators, and values
58 close to or equal to the max() or min(). */
59 const bool quotient_not_negative{(numerator < 0) == (denominator < 0)};
60 return numerator / denominator +
61 (quotient_not_negative && numerator % denominator != 0);
62}
63
64/** Calculates the 128bit result of multiplication of the two specified 64bit
65integers. May use CPU native instructions for speed of standard uint64_t
66multiplication.
67@param[in] x First number to multiply.
68@param[in] y Second number to multiply.
69@param[out] hi A reference to 64bit integer that will store higher 64bits of the
70result.
71@return The lower 64bit of the result. */
72[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
73 uint64_t &hi);
74
75/*Calculates the 64bit result of division of the specified 128bit integer by the
76specified 64bit integer. The result must fit in 64bit or else the behavior is
77undefined. Currently does not use native CPU instructions and can be quite slow.
78@param[in] high High 64bits of the number to divide.
79@param[in] low Low 64bits of the number to divide.
80@param[in] div The number to divide by.
81@return The lower 64bit of the result. */
82[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
83 uint64_t div);
84class fast_modulo_t;
85
86/** Looks for a prime number slightly greater than the given argument.
87The prime is chosen so that it is not near any power of 2.
88@param[in] n positive number > 100
89@return prime */
90[[nodiscard]] uint64_t find_prime(uint64_t n);
91
92namespace detail {
93/** Calculates the 128bit result of multiplication of the two specified 64bit
94integers.
95@param[in] x First number to multiply.
96@param[in] y Second number to multiply.
97@param[out] hi A reference to 64bit integer that will store higher 64bits of the
98result.
99@return The lower 64bit of the result. */
100[[nodiscard]] constexpr uint64_t multiply_uint64_portable(uint64_t x,
101 uint64_t y,
102 uint64_t &hi) {
103 uint32_t x_hi = static_cast<uint32_t>(x >> 32);
104 uint32_t x_lo = static_cast<uint32_t>(x);
105 uint32_t y_hi = static_cast<uint32_t>(y >> 32);
106 uint32_t y_lo = static_cast<uint32_t>(y);
107
108 uint64_t hi_lo = static_cast<uint64_t>(x_hi) * y_lo;
109
110 uint64_t low = static_cast<uint64_t>(x_lo) * y_lo;
111 /* This will not overflow, as (2^32 -1)^2 = 2^64 - 1 - 2 * 2^32, so there is
112 still a place for two 32bit integers to be added. */
113 uint64_t mid = (low >> 32) + static_cast<uint64_t>(x_lo) * y_hi +
114 static_cast<uint32_t>(hi_lo);
115 hi = (mid >> 32) + static_cast<uint64_t>(x_hi) * y_hi + (hi_lo >> 32);
116 return static_cast<uint32_t>(low) + (mid << 32);
117}
118} // namespace detail
119
120#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
121/* MSVC x86 supports native uint64_t -> uint128_t multiplication */
122#include <intrin.h>
123#pragma intrinsic(_umul128)
124[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
125 uint64_t &hi) {
126 return _umul128(x, y, &hi);
127}
128#elif defined(__SIZEOF_INT128__)
129/* Compiler supports 128-bit values (GCC/Clang) */
130
131[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
132 uint64_t &hi) {
133 unsigned __int128 res = (unsigned __int128)x * y;
134 hi = static_cast<uint64_t>(res >> 64);
135 return static_cast<uint64_t>(res);
136}
137#else
138[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
139 uint64_t &hi) {
140 return detail::multiply_uint64_portable(x, y, hi);
141}
142#endif
143
144[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
145 uint64_t div) {
146 uint64_t res = 0;
147 for (auto current_bit = 63; current_bit >= 0; current_bit--) {
148 const auto div_hi = current_bit ? (div >> (64 - current_bit)) : 0;
149 const auto div_lo = div << current_bit;
150 if (div_hi < high || (div_hi == high && div_lo <= low)) {
151 high -= div_hi;
152 if (low < div_lo) {
153 high--;
154 }
155 low -= div_lo;
156 res += 1ULL << current_bit;
157 }
158 }
159 return res;
160}
161
162/** Allows to execute x % mod for a specified mod in a fast way, without using a
163slow operation of division. The additional cost is hidden in constructor to
164preprocess the mod constant. */
166 /* Idea behind this implementation is following: (division sign in all
167 equations below is to be treated as mathematical division on reals)
168
169 x % mod = x - floor(x/mod)*mod
170
171 and...
172
173 x / mod = x * 1/mod = (x * (BIG/mod)) /BIG
174
175 and..
176
177 floor(x/mod) = x / mod - epsilon, where 0<=epsilon<1
178
179 Now, lets define:
180
181 M = floor(BIG/mod)
182
183 And take a look at the value of following expression:
184
185 floor( x*M / BIG) * mod =
186
187 floor(x * floor(BIG/mod) / BIG) * mod =
188 floor(x * ((BIG/mod)-epsilon1) / BIG) * mod =
189 ((x*((BIG/mod)-epsilon1)/BIG - epsilon2) * mod
190
191 This sure looks ugly, but it has interesting properties:
192 (1) is divisible by mod, which you can see, because it has a form (...)*
193 mod
194 (2) is smaller or equal to x, which you can see by setting epsilons to 0
195 (3) assuming BIG>x, the expression is strictly larger than x - 2*mod,
196 because it must be larger than the value for epsilons=1, which is:
197 ((x*((BIG/mod)-1))/BIG - 1) * mod =
198 ((x*BIG/mod - x)/BIG -1) * mod =
199 ((x/mod - x/BIG) - 1) * mod =
200 (x - x/BIG*mod - mod)
201 (4) we can compute it without using division at all, if BIG is 1<<k,
202 as it simplifies to
203 (( x * M ) >> k ) * mod
204
205 So, assuming BIG>x, and is a power of two (say BIG=1<<64), we get an
206 expression, which is divisible by mod, and if we subtract it from x, we get
207 something in the range [0...,2mod). What is left is to compare against mod,
208 and subtract it if it is higher.
209 */
210
211 public:
212 fast_modulo_t() = default;
213 explicit fast_modulo_t(uint64_t mod)
214 : m_mod(mod), m_inv(precompute_inv(mod)) {}
215 explicit fast_modulo_t(uint64_t mod, uint64_t inv) : m_mod(mod), m_inv(inv) {}
216
217 /** Computes the value of x % mod. */
218 uint64_t compute(uint64_t x) const {
219 uint64_t hi;
220 (void)multiply_uint64(x, m_inv, hi);
221
222 const uint64_t guess = hi * m_mod;
223 const uint64_t rest = x - guess;
224
225 return rest - (m_mod <= rest) * m_mod;
226 }
227
228 /** Gets the precomputed value of inverse. */
229 uint64_t get_inverse() const { return m_inv; }
230
231 /** Gets the modulo value. */
232 uint64_t get_mod() const { return m_mod; }
233
234 /** Precomputes the inverse needed for fast modulo operations. */
235 static uint64_t precompute_inv(uint64_t mod) {
236 /* pedantic matter: for mod=1 -- you can remove it if you never plan to use
237 it for 1. */
238 if (mod == 1) {
239 /* According to equations we want M to be 1<<64, but this overflows
240 uint64_t, so, let's do the second best thing we can, which is 1<<64-1,
241 this means that our `guess` will be ((x<<64 - x) >> 64)*mod, which for
242 x=0, is 0 (good), and for x>0 is (x-1)*mod = (x-1)*1 = x-1, and then
243 rest=1, which is also good enough (<2*mod). */
244 return ~uint64_t{0};
245 } else {
246 return divide_128(1, 0, mod);
247 }
248 }
249
250 private:
251 uint64_t m_mod{0};
252 uint64_t m_inv{0};
253};
254
255/** A class that allows to atomically set new modulo value for fast modulo
256computations. */
258 public:
259 mt_fast_modulo_t() : m_data{0ULL, 0ULL} {}
260 explicit mt_fast_modulo_t(uint64_t mod)
261 : m_data{mod, fast_modulo_t::precompute_inv(mod)} {}
262 /* This class can be made copyable, but this requires additional constructors.
263 */
264
266 return m_data.read([](const data_t &stored_data) {
267 return fast_modulo_t{stored_data.m_mod.load(std::memory_order_relaxed),
268 stored_data.m_inv.load(std::memory_order_relaxed)};
269 });
270 }
271
272 void store(uint64_t new_mod) {
273 const fast_modulo_t new_fast_modulo{new_mod};
274 const auto inv = new_fast_modulo.get_inverse();
275 m_data.write([&](data_t &data) {
276 data.m_mod.store(new_mod, std::memory_order_relaxed);
277 data.m_inv.store(inv, std::memory_order_relaxed);
278 });
279 }
280
281 private:
282 struct data_t {
283 std::atomic<uint64_t> m_mod;
284 std::atomic<uint64_t> m_inv;
285 };
286
288};
289
290} // namespace ut
291
292static inline uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm) {
293 return fm.compute(x);
294}
295
296#endif
A utility class which, if inherited from, prevents the descendant class from being copied,...
Definition: ut0class_life_cycle.h:41
A class that allows to read value of variable of some type T atomically and allows the value to be ch...
Definition: ut0seq_lock.h:49
Allows to execute x % mod for a specified mod in a fast way, without using a slow operation of divisi...
Definition: ut0math.h:165
uint64_t get_inverse() const
Gets the precomputed value of inverse.
Definition: ut0math.h:229
uint64_t m_inv
Definition: ut0math.h:252
uint64_t compute(uint64_t x) const
Computes the value of x % mod.
Definition: ut0math.h:218
fast_modulo_t(uint64_t mod)
Definition: ut0math.h:213
static uint64_t precompute_inv(uint64_t mod)
Precomputes the inverse needed for fast modulo operations.
Definition: ut0math.h:235
uint64_t m_mod
Definition: ut0math.h:251
fast_modulo_t(uint64_t mod, uint64_t inv)
Definition: ut0math.h:215
uint64_t get_mod() const
Gets the modulo value.
Definition: ut0math.h:232
fast_modulo_t()=default
A class that allows to atomically set new modulo value for fast modulo computations.
Definition: ut0math.h:257
mt_fast_modulo_t()
Definition: ut0math.h:259
mt_fast_modulo_t(uint64_t mod)
Definition: ut0math.h:260
void store(uint64_t new_mod)
Definition: ut0math.h:272
Seq_lock< data_t > m_data
Definition: ut0math.h:287
fast_modulo_t load() const
Definition: ut0math.h:265
Definition: ut0tuple.h:57
constexpr uint64_t multiply_uint64_portable(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:100
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
constexpr T div_ceil(T numerator, T denominator)
Computes the result of division rounded towards positive infinity.
Definition: ut0math.h:49
uint64_t find_prime(uint64_t n)
Looks for a prime number slightly greater than the given argument.
Definition: ut0math.cc:36
static uint64_t multiply_uint64(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:138
static uint64_t divide_128(uint64_t high, uint64_t low, uint64_t div)
Definition: ut0math.h:144
Definition: ut0math.h:282
std::atomic< uint64_t > m_mod
Definition: ut0math.h:283
std::atomic< uint64_t > m_inv
Definition: ut0math.h:284
Utilities related to class lifecycle.
Debug utilities for Innobase.
static uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm)
Definition: ut0math.h:292
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...
int n
Definition: xcom_base.cc:509