MySQL 9.1.0
Source Code Documentation
ut0bitset.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 1994, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0bitset.h
29 Utilities for bitset operations
30
31 Created 11/05/2018 Bin Su
32 ***********************************************************************/
33
34#ifndef ut0bitset_h
35#define ut0bitset_h
36#include <bit>
37#include <cstdint>
38#include <cstring>
39#include <limits>
40#include "univ.i"
41#include "ut0dbg.h"
42#include "ut0math.h"
43
44/** A simple bitset wrapper class, which lets you access an existing range of
45bytes (not owned by it!) as if it was a <tt>std::bitset</tt> or
46<tt>std::vector<bool></tt>.
47NOTE: Because it is a wrapper, its semantics are similar to <tt>std::span</tt>.
48For example <tt>const Bitset<></tt> can still let someone modify the bits via
49<tt>set()</tt> or <tt>reset()</tt>. If you want to prevent someone from editing
50the buffer, you'd need <tt>Bitset&lt;const byte></tt>. For same reason,
51<tt>bitset1=bitset2</tt> will just repoint <tt>bitset1</tt> to the same range of
52bytes as <tt>bitset2</tt> without copying any bits. If you want to copy the bits
53use <tt>bitset1.copy_from(bitset2.bitset())</tt> instead. */
54template <typename B = byte>
55class Bitset {
56 static_assert(std::is_same_v<std::decay_t<B>, byte>,
57 "Bitset<B> requires B to be a byte or const byte");
58
59 public:
60 /** Constructor */
63
64 /** Returns a wrapper around [pos,pos+len) fragment of the buffer, where pos
65 and len are measured in bytes
66 @param[in] byte_offset position of first byte of selected slice
67 @param[in] bytes_count length of the slice in bytes
68 @return Bitset wrapping the sub-array */
69 Bitset bytes_subspan(size_t byte_offset, size_t bytes_count) const {
70 ut_ad(byte_offset + bytes_count <= m_size_bytes);
71 return Bitset(m_data + byte_offset, bytes_count);
72 }
73 /** Returns a wrapper around fragment of the buffer starting at pos, where pos
74 is measured in bytes
75 @param[in] byte_offset position of first byte of selected slice
76 @return Bitset wrapping the sub-array */
77 Bitset bytes_subspan(size_t byte_offset) const {
78 ut_ad(byte_offset <= m_size_bytes);
79 return bytes_subspan(byte_offset, m_size_bytes - byte_offset);
80 }
81
82 /** Copy a bits from other buffer into this one
83 @param[in] bitset byte array to bits copy from */
84 void copy_from(const byte *bitset) const {
85 memcpy(m_data, bitset, m_size_bytes);
86 }
87
88 /** Set the specified bit to the value 'bit'
89 @param[in] pos specified bit
90 @param[in] v true or false */
91 void set(size_t pos, bool v = true) const {
92 ut_ad(pos / 8 < m_size_bytes);
93 m_data[pos / 8] &= ~(0x1 << (pos & 0x7));
94 m_data[pos / 8] |= (static_cast<byte>(v) << (pos & 0x7));
95 }
96
97 /** Set all bits to true */
98 void set() const { memset(m_data, 0xFF, m_size_bytes); }
99
100 /** Set all bits to false */
101 void reset() const { memset(m_data, 0, m_size_bytes); }
102
103 /** Sets the specified bit to false
104 @param[in] pos specified bit */
105 void reset(size_t pos) const { set(pos, false); }
106
107 /** Converts the content of the bitset to an uint64_t value, such that
108 (value>>i&1) if and only if test(i).
109 The m_size must be at most 8 bytes.
110 @return content of the bitset as uint64_t mapping i-th bit to 2^i */
111 uint64_t to_uint64() const {
112 ut_a(m_size_bytes <= 8);
113 byte bytes[8] = {};
114 memcpy(bytes, m_data, m_size_bytes);
115 return to_uint64(bytes);
116 }
117 /** Value used by find_set to indicate it could not find a bit set to 1.
118 It is guaranteed to be larger than the size of the vector. */
119 constexpr static size_t NOT_FOUND = std::numeric_limits<size_t>::max();
120
121 /** Finds the smallest position which is set and is not smaller than start_pos
122 @param[in] start_pos The position from which to start the search.
123 @return Smallest pos for which test(pos)==true and start_pos<=pos. In case
124 there's no such pos, returns NOT_FOUND */
125 size_t find_set(size_t start_pos) const {
126 /* The reason this function is so complicated is because it is meant to be
127 fast for long sparse bitsets, so it's main part is to iterate over whole
128 uint64_t words, ignoring all that are equal to 0. Alas, in general m_bitset
129 doesn't have to be aligned to word boundary, neither m_bitse + m_size must
130 end at word boundary, worse still m_size could be below 8. Thus we consider
131 following cases:
132 a) start_pos out of bounds -> return NOT_FOUND
133 b) m_size <=8 -> convert the few bytes into uint64_t and use countr_zero
134 c) m_bitset aligned -> iter over whole words, handle unfinished word
135 recursively (a or b)
136 d) unaligned m_bitset -> handle partial word recursively (b), and the
137 aligned rest recursively (a, b or c).
138 Note that in most important usages of this class m_bitset is aligned. */
139 if (m_size_bytes * 8 <= start_pos) {
140 return NOT_FOUND;
141 }
142 if (m_size_bytes <= 8) {
143 const uint64_t all = to_uint64();
144 const uint64_t earlier = (uint64_t{1} << start_pos) - 1;
145 const uint64_t unseen = all & ~earlier;
146 if (unseen) {
147 return std::countr_zero(unseen);
148 }
149 return NOT_FOUND;
150 }
151 const auto start_addr = reinterpret_cast<uintptr_t>(m_data);
152 const size_t start_word_byte_idx =
153 ut::div_ceil(start_addr, uintptr_t{8}) * 8 - start_addr;
154 const auto translate_result = [&start_pos, this](size_t offset) {
155 auto found = bytes_subspan(offset / 8).find_set(start_pos - offset);
156 return found == NOT_FOUND ? found : found + offset;
157 };
158 if (start_word_byte_idx == 0) {
159 // the middle of the m_bitset consists of uint64_t elements
160 auto *words = reinterpret_cast<const uint64_t *>(m_data);
161 size_t word_idx = start_pos / 64;
162 const auto full_words_count = m_size_bytes / 8;
163 if (word_idx < full_words_count) {
164 const uint64_t earlier = (uint64_t{1} << start_pos % 64) - 1;
165 const uint64_t unseen = to_uint64(m_data + word_idx * 8) & ~earlier;
166 if (unseen) {
167 return word_idx * 64 + std::countr_zero(unseen);
168 }
169 // otherwise find first non-empty word
170 ++word_idx;
171 while (word_idx < full_words_count) {
172 if (words[word_idx]) {
173 return word_idx * 64 +
174 std::countr_zero(to_uint64(m_data + word_idx * 8));
175 }
176 ++word_idx;
177 }
178 start_pos = full_words_count * 64;
179 }
180 return translate_result(full_words_count * 64);
181 }
182 // this only occurs for m_bitset not aligned to 64-bit
183 if (start_pos < start_word_byte_idx * 8) {
184 const auto found =
185 bytes_subspan(0, start_word_byte_idx).find_set(start_pos);
186 if (found < NOT_FOUND) {
187 return found;
188 }
189 start_pos = start_word_byte_idx * 8;
190 }
191 return translate_result(start_word_byte_idx * 8);
192 }
193
194 /** Test if the specified bit is set or not
195 @param[in] pos the specified bit
196 @return True if this bit is set, otherwise false */
197 bool test(size_t pos) const {
198 ut_ad(pos / 8 < m_size_bytes);
199 return ((m_data[pos / 8] >> (pos & 0x7)) & 0x1);
200 }
201
202 /** Get the size of current bitset in bytes
203 @return the size of the bitset */
204 size_t size_bytes() const { return (m_size_bytes); }
205
206 /** Get the bitset's bytes buffer
207 @return current bitset */
208 B *data() const { return (m_data); }
209
210 private:
211 /** Converts 8 bytes to uint64_t value, such that
212 (value>>i&1) equals the i-th bit, i.e. (bytes[i/8]>>i%8 & 1).
213 For example, the returned value equals bytes[0] modulo 256.
214 @param[in] bytes the bytes to convert
215 @return uint64_t created by concatenating the bytes in the right order:
216 on Little-Endian it's an identity, on Big-Endian it's std::byteswap. */
217 static constexpr uint64_t to_uint64(const byte *bytes) {
218 /* This pattern this gets recognized by gcc as identity on Little-Endian.
219 The benefit of writing it this way is not only that it works on Big-Endian,
220 but also that it doesn't require the address to be aligned. */
221 return ((uint64_t)(bytes[7]) << 7 * 8) | ((uint64_t)(bytes[6]) << 6 * 8) |
222 ((uint64_t)(bytes[5]) << 5 * 8) | ((uint64_t)(bytes[4]) << 4 * 8) |
223 ((uint64_t)(bytes[3]) << 3 * 8) | ((uint64_t)(bytes[2]) << 2 * 8) |
224 ((uint64_t)(bytes[1]) << 1 * 8) | ((uint64_t)(bytes[0]) << 0 * 8);
225 }
226
227 /** The buffer containing the bitmap. First bit is the lowest bit of the first
228 byte of this buffer. */
230
231 /** The length of the buffer containing the bitmap in bytes. Number of bits is
232 8 times larger than this */
234};
235
236#endif
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:251
A simple bitset wrapper class, which lets you access an existing range of bytes (not owned by it!...
Definition: ut0bitset.h:55
uint64_t to_uint64() const
Converts the content of the bitset to an uint64_t value, such that (value>>i&1) if and only if test(i...
Definition: ut0bitset.h:111
size_t size_bytes() const
Get the size of current bitset in bytes.
Definition: ut0bitset.h:204
B * m_data
The buffer containing the bitmap.
Definition: ut0bitset.h:229
void reset() const
Set all bits to false.
Definition: ut0bitset.h:101
B * data() const
Get the bitset's bytes buffer.
Definition: ut0bitset.h:208
bool test(size_t pos) const
Test if the specified bit is set or not.
Definition: ut0bitset.h:197
Bitset(B *data, size_t size_bytes)
Definition: ut0bitset.h:62
Bitset bytes_subspan(size_t byte_offset) const
Returns a wrapper around fragment of the buffer starting at pos, where pos is measured in bytes.
Definition: ut0bitset.h:77
void reset(size_t pos) const
Sets the specified bit to false.
Definition: ut0bitset.h:105
size_t m_size_bytes
The length of the buffer containing the bitmap in bytes.
Definition: ut0bitset.h:233
void copy_from(const byte *bitset) const
Copy a bits from other buffer into this one.
Definition: ut0bitset.h:84
Bitset()
Constructor.
Definition: ut0bitset.h:61
void set() const
Set all bits to true.
Definition: ut0bitset.h:98
constexpr static size_t NOT_FOUND
Value used by find_set to indicate it could not find a bit set to 1.
Definition: ut0bitset.h:119
Bitset bytes_subspan(size_t byte_offset, size_t bytes_count) const
Returns a wrapper around [pos,pos+len) fragment of the buffer, where pos and len are measured in byte...
Definition: ut0bitset.h:69
size_t find_set(size_t start_pos) const
Finds the smallest position which is set and is not smaller than start_pos.
Definition: ut0bitset.h:125
void set(size_t pos, bool v=true) const
Set the specified bit to the value 'bit'.
Definition: ut0bitset.h:91
static constexpr uint64_t to_uint64(const byte *bytes)
Converts 8 bytes to uint64_t value, such that (value>>i&1) equals the i-th bit, i....
Definition: ut0bitset.h:217
constexpr T div_ceil(T numerator, T denominator)
Computes the result of division rounded towards positive infinity.
Definition: ut0math.h:49
Version control for database, common definitions, and include files.
Debug utilities for Innobase.
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:105
#define ut_a(EXPR)
Abort execution if EXPR does not evaluate to nonzero.
Definition: ut0dbg.h:93
Math functions.