MySQL 8.4.2
Source Code Documentation
ut0math.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2021, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0math.h
29 Math functions.
30
31 ***********************************************************************/
32
33#ifndef ut0math_h
34#define ut0math_h
35
36#include <atomic>
37#include <cstdint>
38#include "ut0class_life_cycle.h"
39#include "ut0dbg.h"
40#include "ut0seq_lock.h"
41
42namespace ut {
43
44/** Calculates the 128bit result of multiplication of the two specified 64bit
45integers. May use CPU native instructions for speed of standard uint64_t
46multiplication.
47@param[in] x First number to multiply.
48@param[in] y Second number to multiply.
49@param[out] hi A reference to 64bit integer that will store higher 64bits of the
50result.
51@return The lower 64bit of the result. */
52[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
53 uint64_t &hi);
54
55/*Calculates the 64bit result of division of the specified 128bit integer by the
56specified 64bit integer. The result must fit in 64bit or else the behavior is
57undefined. Currently does not use native CPU instructions and can be quite slow.
58@param[in] high High 64bits of the number to divide.
59@param[in] low Low 64bits of the number to divide.
60@param[in] div The number to divide by.
61@return The lower 64bit of the result. */
62[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
63 uint64_t div);
64class fast_modulo_t;
65
66/** Looks for a prime number slightly greater than the given argument.
67The prime is chosen so that it is not near any power of 2.
68@param[in] n positive number > 100
69@return prime */
70[[nodiscard]] uint64_t find_prime(uint64_t n);
71
72namespace detail {
73/** Calculates the 128bit result of multiplication of the two specified 64bit
74integers.
75@param[in] x First number to multiply.
76@param[in] y Second number to multiply.
77@param[out] hi A reference to 64bit integer that will store higher 64bits of the
78result.
79@return The lower 64bit of the result. */
80[[nodiscard]] constexpr uint64_t multiply_uint64_portable(uint64_t x,
81 uint64_t y,
82 uint64_t &hi) {
83 uint32_t x_hi = static_cast<uint32_t>(x >> 32);
84 uint32_t x_lo = static_cast<uint32_t>(x);
85 uint32_t y_hi = static_cast<uint32_t>(y >> 32);
86 uint32_t y_lo = static_cast<uint32_t>(y);
87
88 uint64_t hi_lo = static_cast<uint64_t>(x_hi) * y_lo;
89
90 uint64_t low = static_cast<uint64_t>(x_lo) * y_lo;
91 /* This will not overflow, as (2^32 -1)^2 = 2^64 - 1 - 2 * 2^32, so there is
92 still a place for two 32bit integers to be added. */
93 uint64_t mid = (low >> 32) + static_cast<uint64_t>(x_lo) * y_hi +
94 static_cast<uint32_t>(hi_lo);
95 hi = (mid >> 32) + static_cast<uint64_t>(x_hi) * y_hi + (hi_lo >> 32);
96 return static_cast<uint32_t>(low) + (mid << 32);
97}
98} // namespace detail
99
100#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
101/* MSVC x86 supports native uint64_t -> uint128_t multiplication */
102#include <intrin.h>
103#pragma intrinsic(_umul128)
104[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
105 uint64_t &hi) {
106 return _umul128(x, y, &hi);
107}
108#elif defined(__SIZEOF_INT128__)
109/* Compiler supports 128-bit values (GCC/Clang) */
110
111[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
112 uint64_t &hi) {
113 unsigned __int128 res = (unsigned __int128)x * y;
114 hi = static_cast<uint64_t>(res >> 64);
115 return static_cast<uint64_t>(res);
116}
117#else
118[[nodiscard]] static inline uint64_t multiply_uint64(uint64_t x, uint64_t y,
119 uint64_t &hi) {
120 return detail::multiply_uint64_portable(x, y, hi);
121}
122#endif
123
124[[nodiscard]] static inline uint64_t divide_128(uint64_t high, uint64_t low,
125 uint64_t div) {
126 uint64_t res = 0;
127 for (auto current_bit = 63; current_bit >= 0; current_bit--) {
128 const auto div_hi = current_bit ? (div >> (64 - current_bit)) : 0;
129 const auto div_lo = div << current_bit;
130 if (div_hi < high || (div_hi == high && div_lo <= low)) {
131 high -= div_hi;
132 if (low < div_lo) {
133 high--;
134 }
135 low -= div_lo;
136 res += 1ULL << current_bit;
137 }
138 }
139 return res;
140}
141
142/** Allows to execute x % mod for a specified mod in a fast way, without using a
143slow operation of division. The additional cost is hidden in constructor to
144preprocess the mod constant. */
146 /* Idea behind this implementation is following: (division sign in all
147 equations below is to be treated as mathematical division on reals)
148
149 x % mod = x - floor(x/mod)*mod
150
151 and...
152
153 x / mod = x * 1/mod = (x * (BIG/mod)) /BIG
154
155 and..
156
157 floor(x/mod) = x / mod - epsilon, where 0<=epsilon<1
158
159 Now, lets define:
160
161 M = floor(BIG/mod)
162
163 And take a look at the value of following expression:
164
165 floor( x*M / BIG) * mod =
166
167 floor(x * floor(BIG/mod) / BIG) * mod =
168 floor(x * ((BIG/mod)-epsilon1) / BIG) * mod =
169 ((x*((BIG/mod)-epsilon1)/BIG - epsilon2) * mod
170
171 This sure looks ugly, but it has interesting properties:
172 (1) is divisible by mod, which you can see, because it has a form (...)*
173 mod
174 (2) is smaller or equal to x, which you can see by setting epsilons to 0
175 (3) assuming BIG>x, the expression is strictly larger than x - 2*mod,
176 because it must be larger than the value for epsilons=1, which is:
177 ((x*((BIG/mod)-1))/BIG - 1) * mod =
178 ((x*BIG/mod - x)/BIG -1) * mod =
179 ((x/mod - x/BIG) - 1) * mod =
180 (x - x/BIG*mod - mod)
181 (4) we can compute it without using division at all, if BIG is 1<<k,
182 as it simplifies to
183 (( x * M ) >> k ) * mod
184
185 So, assuming BIG>x, and is a power of two (say BIG=1<<64), we get an
186 expression, which is divisible by mod, and if we subtract it from x, we get
187 something in the range [0...,2mod). What is left is to compare against mod,
188 and subtract it if it is higher.
189 */
190
191 public:
192 fast_modulo_t() = default;
193 explicit fast_modulo_t(uint64_t mod)
194 : m_mod(mod), m_inv(precompute_inv(mod)) {}
195 explicit fast_modulo_t(uint64_t mod, uint64_t inv) : m_mod(mod), m_inv(inv) {}
196
197 /** Computes the value of x % mod. */
198 uint64_t compute(uint64_t x) const {
199 uint64_t hi;
200 (void)multiply_uint64(x, m_inv, hi);
201
202 const uint64_t guess = hi * m_mod;
203 const uint64_t rest = x - guess;
204
205 return rest - (m_mod <= rest) * m_mod;
206 }
207
208 /** Gets the precomputed value of inverse. */
209 uint64_t get_inverse() const { return m_inv; }
210
211 /** Gets the modulo value. */
212 uint64_t get_mod() const { return m_mod; }
213
214 /** Precomputes the inverse needed for fast modulo operations. */
215 static uint64_t precompute_inv(uint64_t mod) {
216 /* pedantic matter: for mod=1 -- you can remove it if you never plan to use
217 it for 1. */
218 if (mod == 1) {
219 /* According to equations we want M to be 1<<64, but this overflows
220 uint64_t, so, let's do the second best thing we can, which is 1<<64-1,
221 this means that our `guess` will be ((x<<64 - x) >> 64)*mod, which for
222 x=0, is 0 (good), and for x>0 is (x-1)*mod = (x-1)*1 = x-1, and then
223 rest=1, which is also good enough (<2*mod). */
224 return ~uint64_t{0};
225 } else {
226 return divide_128(1, 0, mod);
227 }
228 }
229
230 private:
231 uint64_t m_mod{0};
232 uint64_t m_inv{0};
233};
234
235/** A class that allows to atomically set new modulo value for fast modulo
236computations. */
238 public:
239 mt_fast_modulo_t() : m_data{0ULL, 0ULL} {}
240 explicit mt_fast_modulo_t(uint64_t mod)
241 : m_data{mod, fast_modulo_t::precompute_inv(mod)} {}
242 /* This class can be made copyable, but this requires additional constructors.
243 */
244
246 return m_data.read([](const data_t &stored_data) {
247 return fast_modulo_t{stored_data.m_mod.load(std::memory_order_relaxed),
248 stored_data.m_inv.load(std::memory_order_relaxed)};
249 });
250 }
251
252 void store(uint64_t new_mod) {
253 const fast_modulo_t new_fast_modulo{new_mod};
254 const auto inv = new_fast_modulo.get_inverse();
255 m_data.write([&](data_t &data) {
256 data.m_mod.store(new_mod, std::memory_order_relaxed);
257 data.m_inv.store(inv, std::memory_order_relaxed);
258 });
259 }
260
261 private:
262 struct data_t {
263 std::atomic<uint64_t> m_mod;
264 std::atomic<uint64_t> m_inv;
265 };
266
268};
269
270} // namespace ut
271
272static inline uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm) {
273 return fm.compute(x);
274}
275
276#endif
A utility class which, if inherited from, prevents the descendant class from being copied,...
Definition: ut0class_life_cycle.h:41
A class that allows to read value of variable of some type T atomically and allows the value to be ch...
Definition: ut0seq_lock.h:49
Allows to execute x % mod for a specified mod in a fast way, without using a slow operation of divisi...
Definition: ut0math.h:145
uint64_t get_inverse() const
Gets the precomputed value of inverse.
Definition: ut0math.h:209
uint64_t m_inv
Definition: ut0math.h:232
uint64_t compute(uint64_t x) const
Computes the value of x % mod.
Definition: ut0math.h:198
fast_modulo_t(uint64_t mod)
Definition: ut0math.h:193
static uint64_t precompute_inv(uint64_t mod)
Precomputes the inverse needed for fast modulo operations.
Definition: ut0math.h:215
uint64_t m_mod
Definition: ut0math.h:231
fast_modulo_t(uint64_t mod, uint64_t inv)
Definition: ut0math.h:195
uint64_t get_mod() const
Gets the modulo value.
Definition: ut0math.h:212
fast_modulo_t()=default
A class that allows to atomically set new modulo value for fast modulo computations.
Definition: ut0math.h:237
mt_fast_modulo_t()
Definition: ut0math.h:239
mt_fast_modulo_t(uint64_t mod)
Definition: ut0math.h:240
fast_modulo_t load()
Definition: ut0math.h:245
void store(uint64_t new_mod)
Definition: ut0math.h:252
Seq_lock< data_t > m_data
Definition: ut0math.h:267
Definition: ut0tuple.h:57
constexpr uint64_t multiply_uint64_portable(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:80
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
uint64_t find_prime(uint64_t n)
Looks for a prime number slightly greater than the given argument.
Definition: ut0math.cc:36
static uint64_t multiply_uint64(uint64_t x, uint64_t y, uint64_t &hi)
Calculates the 128bit result of multiplication of the two specified 64bit integers.
Definition: ut0math.h:118
static uint64_t divide_128(uint64_t high, uint64_t low, uint64_t div)
Definition: ut0math.h:124
Definition: ut0math.h:262
std::atomic< uint64_t > m_mod
Definition: ut0math.h:263
std::atomic< uint64_t > m_inv
Definition: ut0math.h:264
Utilities related to class lifecycle.
Debug utilities for Innobase.
static uint64_t operator%(uint64_t x, const ut::fast_modulo_t &fm)
Definition: ut0math.h:272
Implements a sequential lock structure for non-locking atomic read/write operations on a complex stru...
int n
Definition: xcom_base.cc:509