MySQL 8.0.40
Source Code Documentation
ut0sharded_bitset.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2023, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25*****************************************************************************/
26#pragma once
27#include <cstdint>
28#include <limits>
29#include "ut0bitset.h"
30#include "ut0new.h"
31namespace ut {
32/** A Sharded_bitset<SHARDS_COUNT>(n) is like a bitset<n> in that it represents
33a vector of n bits, which can be set(pos) or reset(pos) for 0<=pos<n.
34One difference is that the size n is specified at runtime via constructor.
35The other difference is that SHARDS_COUNT specifies sharding policy used and
36enforced by the caller:
37
38 shard_id = pos % SHARDS_COUNT
39
40which is then respected by this class, by ensuring no data-race is possible, as
41long as the caller ensures concurrent calls for two different pos from same
42shard are not possible. For example, if one thread calls set(pos1) and another
43calls set(pos2) in parallel, then it is fine, as long as
44
45 pos1 % SHARDS_COUNT != pos2 % SHARDS_COUNT
46
47This is the main reason to prefer this data structure to bitset, which doesn't
48give you such guarantee as bit from different shards can be in the same word.
49Another reason to use this data structure is because it has a very fast
50implementation of find_set_in_this_shard(pos) which finds smallest
51pos+SHARDS_COUNT*i which is set (for 0<=i). This is useful to quickly iterate
52over all bits which are set, while also respecting sharding policy.
53For example:
54 for (size_t shard=0; shard < SHARDS_COUNT; ++shard) {
55 mutex_enter(mutex[shard]);
56 for (size_t pos = find_set_in_this_shard(shard);
57 pos < n;
58 pos = find_set_in_this_shard(pos+SHARDS_COUNT)) {
59
60 cout << pos << " is set !" << endl;
61 }
62 mutex_exit(mutex[shard]);
63 }
64*/
65template <size_t SHARDS_COUNT>
67 private:
68 /** The bits for each shard are stored separately to avoid data-races,
69 false-sharing, and make linear scans within shard faster.
70 For similar reasons they are packed to aligned uint64_t words.
71 But, they are allocated as one big array, so instead of indirect pointers,
72 we can just compute where each shard starts or ends, @see get_shard() */
74 using words_allocator = typename decltype(words)::allocator_type;
75
76 /** How many words are devoted to each shard in words[]. All shards get equal
77 fragment of words[], even if n is not divisible by SHARDS_COUNT, and thus the
78 words_per_shard() might be slightly larger than necessary as we round up.
79 The region of words[] for a given shard is
80 words[shard*words_per_shard()]...words[(shard+1)*words_per_shard()-1]
81 @return number of items of words[] assigned to each shard */
82 size_t words_per_shard() const { return words.size() / SHARDS_COUNT; }
83
84 /**
85 @return a bitset wrapper around the fragment of words[] for specified shard */
86 Bitset<> get_shard(size_t shard_id) const {
87 const auto shard_bytes = words_per_shard() * 8;
88 return Bitset<>((byte *)words.data() + shard_bytes * shard_id, shard_bytes);
89 }
90
91 public:
92 /** Initializes a data structure capable of storing n bits.
93 Initializes all bits to unset.
94 @param[in] n The maximum position + 1.
95 @param[in] mem_key PSI memory key used to track allocations. */
97 : /* Each shard must have same length to keep the mapping simple.
98 The shard to which maximal number of positions from [0,n) belongs is
99 always the 0th shard, which needs to represent ceil(n/SHARDS_COUNT)
100 bits. */
101 words(ut::div_ceil(n, SHARDS_COUNT * 64) * SHARDS_COUNT, 0,
102 words_allocator{mem_key.m_key}) {}
103
104 /** Sets pos-th bit
105 @param[in] pos The position of the bit to be set to 1 */
106 void set(size_t pos) {
107 get_shard(pos % SHARDS_COUNT).set(pos / SHARDS_COUNT);
108 }
109
110 /** Resets pos-th bit
111 @param[in] pos The position of the bit to be reset to 0 */
112 void reset(size_t pos) {
113 get_shard(pos % SHARDS_COUNT).reset(pos / SHARDS_COUNT);
114 }
115
116 /** Finds a smallest position which is set and belongs to the same shard as
117 start_pos, and is not smaller than start_pos
118 @param[in] start_pos The position, such as passed to set(pos)
119 @return Smallest start_pos+SHARDS_COUNT*i which is set and 0<=i. In case no
120 bit set matching these criteria can be found, returns "infinity" */
121 size_t find_set_in_this_shard(size_t start_pos) {
122 const size_t shard_id = start_pos % SHARDS_COUNT;
123 const auto bs = get_shard(shard_id);
124 const size_t found = bs.find_set(start_pos / SHARDS_COUNT);
125 return found == bs.NOT_FOUND ? found : found * SHARDS_COUNT + shard_id;
126 }
127};
128} // namespace ut
A simple bitset wrapper class, which lets you access an existing range of bytes (not owned by it!...
Definition: ut0bitset.h:54
void reset() const
Set all bits to false.
Definition: ut0bitset.h:100
void set(size_t pos, bool v=true) const
Set the specified bit to the value 'bit'.
Definition: ut0bitset.h:90
A Sharded_bitset<SHARDS_COUNT>(n) is like a bitset<n> in that it represents a vector of n bits,...
Definition: ut0sharded_bitset.h:66
Sharded_bitset(size_t n, ut::PSI_memory_key_t mem_key)
Initializes a data structure capable of storing n bits.
Definition: ut0sharded_bitset.h:96
size_t find_set_in_this_shard(size_t start_pos)
Finds a smallest position which is set and belongs to the same shard as start_pos,...
Definition: ut0sharded_bitset.h:121
void reset(size_t pos)
Resets pos-th bit.
Definition: ut0sharded_bitset.h:112
typename decltype(words)::allocator_type words_allocator
Definition: ut0sharded_bitset.h:74
ut::vector< uint64_t > words
The bits for each shard are stored separately to avoid data-races, false-sharing, and make linear sca...
Definition: ut0sharded_bitset.h:73
size_t words_per_shard() const
How many words are devoted to each shard in words[].
Definition: ut0sharded_bitset.h:82
void set(size_t pos)
Sets pos-th bit.
Definition: ut0sharded_bitset.h:106
Bitset get_shard(size_t shard_id) const
Definition: ut0sharded_bitset.h:86
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
constexpr T div_ceil(T numerator, T denominator)
Computes the result of division rounded towards positive infinity.
Definition: ut0math.h:83
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2875
Light-weight and type-safe wrapper around the PSI_memory_key that eliminates the possibility of intro...
Definition: ut0new.h:178
Utilities for bitset operations.
Dynamic memory allocation routines and custom allocators specifically crafted to support memory instr...
int n
Definition: xcom_base.cc:509