MySQL 8.0.40
Source Code Documentation
ut0bitset.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0bitset.h
29 Utilities for bitset operations
30
31 Created 11/05/2018 Bin Su
32 ***********************************************************************/
33
34#ifndef ut0bitset_h
35#define ut0bitset_h
36#include <cstdint>
37#include <cstring>
38#include <limits>
39#include "univ.i"
40#include "ut0dbg.h"
41#include "ut0math.h"
42
43/** A simple bitset wrapper class, which lets you access an existing range of
44bytes (not owned by it!) as if it was a <tt>std::bitset</tt> or
45<tt>std::vector<bool></tt>.
46NOTE: Because it is a wrapper, its semantics are similar to <tt>std::span</tt>.
47For example <tt>const Bitset<></tt> can still let someone modify the bits via
48<tt>set()</tt> or <tt>reset()</tt>. If you want to prevent someone from editing
49the buffer, you'd need <tt>Bitset&lt;const byte></tt>. For same reason,
50<tt>bitset1=bitset2</tt> will just repoint <tt>bitset1</tt> to the same range of
51bytes as <tt>bitset2</tt> without copying any bits. If you want to copy the bits
52use <tt>bitset1.copy_from(bitset2.bitset())</tt> instead. */
53template <typename B = byte>
54class Bitset {
55 static_assert(std::is_same_v<std::decay_t<B>, byte>,
56 "Bitset<B> requires B to be a byte or const byte");
57
58 public:
59 /** Constructor */
62
63 /** Returns a wrapper around [pos,pos+len) fragment of the buffer, where pos
64 and len are measured in bytes
65 @param[in] byte_offset position of first byte of selected slice
66 @param[in] bytes_count length of the slice in bytes
67 @return Bitset wrapping the sub-array */
68 Bitset bytes_subspan(size_t byte_offset, size_t bytes_count) const {
69 ut_ad(byte_offset + bytes_count <= m_size_bytes);
70 return Bitset(m_data + byte_offset, bytes_count);
71 }
72 /** Returns a wrapper around fragment of the buffer starting at pos, where pos
73 is measured in bytes
74 @param[in] byte_offset position of first byte of selected slice
75 @return Bitset wrapping the sub-array */
76 Bitset bytes_subspan(size_t byte_offset) const {
77 ut_ad(byte_offset <= m_size_bytes);
78 return bytes_subspan(byte_offset, m_size_bytes - byte_offset);
79 }
80
81 /** Copy a bits from other buffer into this one
82 @param[in] bitset byte array to bits copy from */
83 void copy_from(const byte *bitset) const {
84 memcpy(m_data, bitset, m_size_bytes);
85 }
86
87 /** Set the specified bit to the value 'bit'
88 @param[in] pos specified bit
89 @param[in] v true or false */
90 void set(size_t pos, bool v = true) const {
91 ut_ad(pos / 8 < m_size_bytes);
92 m_data[pos / 8] &= ~(0x1 << (pos & 0x7));
93 m_data[pos / 8] |= (static_cast<byte>(v) << (pos & 0x7));
94 }
95
96 /** Set all bits to true */
97 void set() const { memset(m_data, 0xFF, m_size_bytes); }
98
99 /** Set all bits to false */
100 void reset() const { memset(m_data, 0, m_size_bytes); }
101
102 /** Sets the specified bit to false
103 @param[in] pos specified bit */
104 void reset(size_t pos) const { set(pos, false); }
105
106 /** Converts the content of the bitset to an uint64_t value, such that
107 (value>>i&1) if and only if test(i).
108 The m_size must be at most 8 bytes.
109 @return content of the bitset as uint64_t mapping i-th bit to 2^i */
110 uint64_t to_uint64() const {
111 ut_a(m_size_bytes <= 8);
112 byte bytes[8] = {};
113 memcpy(bytes, m_data, m_size_bytes);
114 return to_uint64(bytes);
115 }
116 /** Value used by find_set to indicate it could not find a bit set to 1.
117 It is guaranteed to be larger than the size of the vector. */
118 constexpr static size_t NOT_FOUND = std::numeric_limits<size_t>::max();
119
120 /** Finds the smallest position which is set and is not smaller than start_pos
121 @param[in] start_pos The position from which to start the search.
122 @return Smallest pos for which test(pos)==true and start_pos<=pos. In case
123 there's no such pos, returns NOT_FOUND */
124 size_t find_set(size_t start_pos) const {
125 /* The reason this function is so complicated is because it is meant to be
126 fast for long sparse bitsets, so it's main part is to iterate over whole
127 uint64_t words, ignoring all that are equal to 0. Alas, in general m_bitset
128 doesn't have to be aligned to word boundary, neither m_bitse + m_size must
129 end at word boundary, worse still m_size could be below 8. Thus we consider
130 following cases:
131 a) start_pos out of bounds -> return NOT_FOUND
132 b) m_size <=8 -> convert the few bytes into uint64_t and use countr_zero
133 c) m_bitset aligned -> iter over whole words, handle unfinished word
134 recursively (a or b)
135 d) unaligned m_bitset -> handle partial word recursively (b), and the
136 aligned rest recursively (a, b or c).
137 Note that in most important usages of this class m_bitset is aligned. */
138 if (m_size_bytes * 8 <= start_pos) {
139 return NOT_FOUND;
140 }
141 if (m_size_bytes <= 8) {
142 const uint64_t all = to_uint64();
143 const uint64_t earlier = (uint64_t{1} << start_pos) - 1;
144 const uint64_t unseen = all & ~earlier;
145 if (unseen) {
146 return ut::countr_zero(unseen);
147 }
148 return NOT_FOUND;
149 }
150 const auto start_addr = reinterpret_cast<uintptr_t>(m_data);
151 const size_t start_word_byte_idx =
152 ut::div_ceil(start_addr, uintptr_t{8}) * 8 - start_addr;
153 const auto translate_result = [&start_pos, this](size_t offset) {
154 auto found = bytes_subspan(offset / 8).find_set(start_pos - offset);
155 return found == NOT_FOUND ? found : found + offset;
156 };
157 if (start_word_byte_idx == 0) {
158 // the middle of the m_bitset consists of uint64_t elements
159 auto *words = reinterpret_cast<const uint64_t *>(m_data);
160 size_t word_idx = start_pos / 64;
161 const auto full_words_count = m_size_bytes / 8;
162 if (word_idx < full_words_count) {
163 const uint64_t earlier = (uint64_t{1} << start_pos % 64) - 1;
164 const uint64_t unseen = to_uint64(m_data + word_idx * 8) & ~earlier;
165 if (unseen) {
166 return word_idx * 64 + ut::countr_zero(unseen);
167 }
168 // otherwise find first non-empty word
169 ++word_idx;
170 while (word_idx < full_words_count) {
171 if (words[word_idx]) {
172 return word_idx * 64 +
173 ut::countr_zero(to_uint64(m_data + word_idx * 8));
174 }
175 ++word_idx;
176 }
177 start_pos = full_words_count * 64;
178 }
179 return translate_result(full_words_count * 64);
180 }
181 // this only occurs for m_bitset not aligned to 64-bit
182 if (start_pos < start_word_byte_idx * 8) {
183 const auto found =
184 bytes_subspan(0, start_word_byte_idx).find_set(start_pos);
185 if (found < NOT_FOUND) {
186 return found;
187 }
188 start_pos = start_word_byte_idx * 8;
189 }
190 return translate_result(start_word_byte_idx * 8);
191 }
192
193 /** Test if the specified bit is set or not
194 @param[in] pos the specified bit
195 @return True if this bit is set, otherwise false */
196 bool test(size_t pos) const {
197 ut_ad(pos / 8 < m_size_bytes);
198 return ((m_data[pos / 8] >> (pos & 0x7)) & 0x1);
199 }
200
201 /** Get the size of current bitset in bytes
202 @return the size of the bitset */
203 size_t size_bytes() const { return (m_size_bytes); }
204
205 /** Get the bitset's bytes buffer
206 @return current bitset */
207 B *data() const { return (m_data); }
208
209 private:
210 /** Converts 8 bytes to uint64_t value, such that
211 (value>>i&1) equals the i-th bit, i.e. (bytes[i/8]>>i%8 & 1).
212 For example, the returned value equals bytes[0] modulo 256.
213 @param[in] bytes the bytes to convert
214 @return uint64_t created by concatenating the bytes in the right order:
215 on Little-Endian it's an identity, on Big-Endian it's std::byteswap. */
216 static constexpr uint64_t to_uint64(const byte *bytes) {
217 /* This pattern this gets recognized by gcc as identity on Little-Endian.
218 The benefit of writing it this way is not only that it works on Big-Endian,
219 but also that it doesn't require the address to be aligned. */
220 return ((uint64_t)(bytes[7]) << 7 * 8) | ((uint64_t)(bytes[6]) << 6 * 8) |
221 ((uint64_t)(bytes[5]) << 5 * 8) | ((uint64_t)(bytes[4]) << 4 * 8) |
222 ((uint64_t)(bytes[3]) << 3 * 8) | ((uint64_t)(bytes[2]) << 2 * 8) |
223 ((uint64_t)(bytes[1]) << 1 * 8) | ((uint64_t)(bytes[0]) << 0 * 8);
224 }
225
226 /** The buffer containing the bitmap. First bit is the lowest bit of the first
227 byte of this buffer. */
229
230 /** The length of the buffer containing the bitmap in bytes. Number of bits is
231 8 times larger than this */
233};
234
235#endif
A simple bitset wrapper class, which lets you access an existing range of bytes (not owned by it!...
Definition: ut0bitset.h:54
uint64_t to_uint64() const
Converts the content of the bitset to an uint64_t value, such that (value>>i&1) if and only if test(i...
Definition: ut0bitset.h:110
size_t size_bytes() const
Get the size of current bitset in bytes.
Definition: ut0bitset.h:203
B * m_data
The buffer containing the bitmap.
Definition: ut0bitset.h:228
void reset() const
Set all bits to false.
Definition: ut0bitset.h:100
B * data() const
Get the bitset's bytes buffer.
Definition: ut0bitset.h:207
bool test(size_t pos) const
Test if the specified bit is set or not.
Definition: ut0bitset.h:196
Bitset(B *data, size_t size_bytes)
Definition: ut0bitset.h:61
Bitset bytes_subspan(size_t byte_offset) const
Returns a wrapper around fragment of the buffer starting at pos, where pos is measured in bytes.
Definition: ut0bitset.h:76
void reset(size_t pos) const
Sets the specified bit to false.
Definition: ut0bitset.h:104
size_t m_size_bytes
The length of the buffer containing the bitmap in bytes.
Definition: ut0bitset.h:232
void copy_from(const byte *bitset) const
Copy a bits from other buffer into this one.
Definition: ut0bitset.h:83
Bitset()
Constructor.
Definition: ut0bitset.h:60
void set() const
Set all bits to true.
Definition: ut0bitset.h:97
constexpr static size_t NOT_FOUND
Value used by find_set to indicate it could not find a bit set to 1.
Definition: ut0bitset.h:118
Bitset bytes_subspan(size_t byte_offset, size_t bytes_count) const
Returns a wrapper around [pos,pos+len) fragment of the buffer, where pos and len are measured in byte...
Definition: ut0bitset.h:68
size_t find_set(size_t start_pos) const
Finds the smallest position which is set and is not smaller than start_pos.
Definition: ut0bitset.h:124
void set(size_t pos, bool v=true) const
Set the specified bit to the value 'bit'.
Definition: ut0bitset.h:90
static constexpr uint64_t to_uint64(const byte *bytes)
Converts 8 bytes to uint64_t value, such that (value>>i&1) equals the i-th bit, i....
Definition: ut0bitset.h:216
Fido Client Authentication nullptr
Definition: fido_client_plugin.cc:222
constexpr T div_ceil(T numerator, T denominator)
Computes the result of division rounded towards positive infinity.
Definition: ut0math.h:83
static int countr_zero(uint64_t x)
Portable replacement for std::countr_zero(uint64_t) from C++20.
Definition: ut0math.h:47
Version control for database, common definitions, and include files.
Debug utilities for Innobase.
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:69
#define ut_a(EXPR)
Abort execution if EXPR does not evaluate to nonzero.
Definition: ut0dbg.h:57
Math functions.
static int all(site_def const *s, node_no node)
Definition: xcom_transport.cc:872