MySQL 8.3.0
Source Code Documentation
ut0lock_free_hash.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2015, 2023, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25*****************************************************************************/
26
27/** @file include/ut0lock_free_hash.h
28 Lock free hash implementation
29
30 Created Mar 16, 2015 Vasil Dimov
31 *******************************************************/
32
33#ifndef ut0lock_free_hash_h
34#define ut0lock_free_hash_h
35
36#define BOOST_ATOMIC_NO_LIB
37
38#include "univ.i"
39
40#include <atomic>
41#include <list>
42
43#include "os0numa.h" /* os_getcpu() */
44#include "os0thread.h" /* ut::this_thread_hash */
45#include "ut0cpu_cache.h" /* Cache_aligned<T> */
46#include "ut0mutex.h" /* ib_mutex_t */
47#include "ut0new.h" /* UT_NEW*(), ut::delete_*() */
48#include "ut0rnd.h" /* ut::hash_uint64() */
49
50/** An interface class to a basic hash table, that ut_lock_free_hash_t is. */
52 public:
53 /** The value that is returned when the searched for key is not
54 found. */
55 static const int64_t NOT_FOUND = INT64_MAX;
56
57 /** Destructor. */
58 virtual ~ut_hash_interface_t() = default;
59
60 /** Get the value mapped to a given key.
61 @param[in] key key to look for
62 @return the value that corresponds to key or NOT_FOUND. */
63 virtual int64_t get(uint64_t key) const = 0;
64
65 /** Set the value for a given key, either inserting a new (key, val)
66 tuple or overwriting an existent value.
67 @param[in] key key whose value to set
68 @param[in] val value to be set */
69 virtual void set(uint64_t key, int64_t val) = 0;
70
71 /** Delete a (key, val) pair from the hash.
72 @param[in] key key whose pair to delete */
73 virtual void del(uint64_t key) = 0;
74
75 /** Increment the value for a given key with 1 or insert a new tuple
76 (key, 1).
77 @param[in] key key whose value to increment or insert as 1 */
78 virtual void inc(uint64_t key) = 0;
79
80 /** Decrement the value of a given key with 1 or insert a new tuple
81 (key, -1).
82 @param[in] key key whose value to decrement */
83 virtual void dec(uint64_t key) = 0;
84
85#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
86 /** Print statistics about how many searches have been done on the hash
87 and how many collisions. */
88 virtual void print_stats() = 0;
89#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
90};
91
92/** Lock free ref counter. It uses a few counter variables internally to improve
93performance on machines with lots of CPUs. */
95 public:
96 /** Constructor. */
98 /* The initial value of std::atomic depends on C++ standard and the way
99 the containing object was initialized, so make sure it's always zero. */
100 for (size_t i = 0; i < m_cnt.size(); i++) {
101 m_cnt[i].store(0);
102 }
103 }
104
105 class handle_t {
106 public:
108
109 handle_t(std::atomic<uint64_t> *counter) : m_counter{counter} {
110 m_counter->fetch_add(1);
111 }
112
113 handle_t(handle_t &&other) noexcept : m_counter{other.m_counter} {
114 other.m_counter = nullptr;
115 }
116
117 explicit operator bool() const noexcept { return m_counter != nullptr; }
118
120 if (m_counter != nullptr) {
121 m_counter->fetch_sub(1);
122 }
123 }
124
125 private:
126 std::atomic<uint64_t> *m_counter;
127 };
128
129 /** Increment the counter. */
131
132 /** Wait until all previously existing references get released.
133 This function assumes that the caller ensured that no new references
134 should appear (or rather: no long-lived references - there can be treads which
135 call reference(), realize the object should no longer be referenced and
136 immediately release it)
137 */
139 for (size_t i = 0; i < m_cnt.size(); i++) {
140 while (m_cnt[i].load()) {
141 std::this_thread::yield();
142 }
143 }
144 }
145
146 private:
147 /** Derive an appropriate index in m_cnt[] for the current thread.
148 @return index in m_cnt[] for this thread to use */
149 size_t n_cnt_index() const {
150 size_t cpu;
151
152#ifdef HAVE_OS_GETCPU
153 cpu = static_cast<size_t>(os_getcpu());
154#else /* HAVE_OS_GETCPU */
156#endif /* HAVE_OS_GETCPU */
157
158 return cpu % m_cnt.size();
159 }
160
161 /** The shards of the counter.
162 We've just picked up some number that is supposedly larger than the number of
163 CPUs on the system or close to it, but small enough that
164 await_release_of_old_references() finishes in reasonable time, and that the
165 size (256 * 64B = 16 KiB) is not too large.
166 We pad the atomics to avoid false sharing. In particular, we hope that on
167 platforms which HAVE_OS_GETCPU the same CPU will always fetch the same counter
168 and thus will store it in its local cache. This should also help on NUMA
169 architectures by avoiding the cost of synchronizing caches between CPUs.*/
170 std::array<ut::Cacheline_aligned<std::atomic<uint64_t>>, 256> m_cnt;
171};
172
173/** A node in a linked list of arrays. The pointer to the next node is
174atomically set (CAS) when a next element is allocated. */
175template <typename T>
177 public:
179
180 /** Constructor.
181 @param[in] n_elements number of elements to create */
182 explicit ut_lock_free_list_node_t(size_t n_elements)
183 : m_base{ut::make_unique<T[]>(
185 m_n_base_elements{n_elements},
186 m_pending_free{false},
187 m_next{nullptr} {
188 ut_ad(n_elements > 0);
189 }
190
191 static ut_lock_free_list_node_t *alloc(size_t n_elements) {
192 return ut::aligned_new_withkey<ut_lock_free_list_node_t<T>>(
194 alignof(ut_lock_free_list_node_t<T>), n_elements);
195 }
196
199 }
200
201 /** Create and append a new array to this one and store a pointer
202 to it in 'm_next'. This is done in a way that multiple threads can
203 attempt this at the same time and only one will succeed. When this
204 method returns, the caller can be sure that the job is done (either
205 by this or another thread).
206 @param[in] deleted_val the constant that designates that
207 a value is deleted
208 @param[out] grown_by_this_thread set to true if the next
209 array was created and appended by this thread; set to false if
210 created and appended by another thread.
211 @return the next array, appended by this or another thread */
212 next_t grow(int64_t deleted_val, bool *grown_by_this_thread) {
213 size_t new_size;
214
215 if (m_n_base_elements > 1024 &&
216 n_deleted(deleted_val) > m_n_base_elements * 3 / 4) {
217 /* If there are too many deleted elements (more than
218 75%), then do not double the size. */
219 new_size = m_n_base_elements;
220 } else {
221 new_size = m_n_base_elements * 2;
222 }
223
224 next_t new_arr = alloc(new_size);
225
226 /* Publish the allocated entry. If somebody did this in the
227 meantime then just discard the allocated entry and do
228 nothing. */
229 next_t expected = nullptr;
230 if (!m_next.compare_exchange_strong(expected, new_arr)) {
231 /* Somebody just did that. */
233
234 /* 'expected' has the current value which
235 must be != NULL because the CAS failed. */
236 ut_ad(expected != nullptr);
237
238 *grown_by_this_thread = false;
239
240 return (expected);
241 } else {
242 *grown_by_this_thread = true;
243 return (new_arr);
244 }
245 }
246
247 /* This object is only ever destroyed after it is removed from the
248 list of arrays in the hash table (which means that new threads cannot
249 start using it) and the number of threads that use it has decreased
250 to zero. */
251
252 /** Mark the beginning of an access to this object. Used to prevent a
253 destruction of an array pointed by m_base while our thread is accessing it.
254 @return A handle which protects the m_base as long as the handle is not
255 destructed. If the handle is {} (==false), the access was denied, this object
256 is to be removed from the list and thus new access to it is not allowed.
257 The caller should retry from the head of the list. */
259 auto handle = m_n_ref.reference();
260
261 if (m_pending_free.load()) {
262 /* Don't allow access if freeing is pending. Ie if
263 another thread is waiting for readers to go away
264 before it can free the m_base's member of this
265 object. */
266 return {};
267 }
268
269 return handle;
270 }
271
272 /** Wait until all previously held references are released */
275 }
276
277 /** Base array. */
279
280 /** Number of elements in 'm_base'. */
282
283 /** Indicate whether some thread is waiting for readers to go away
284 before it can free the memory occupied by the m_base member. */
285 std::atomic_bool m_pending_free;
286
287 /** Pointer to the next node if any or NULL. This begins its life
288 as NULL and is only changed once to some real value. Never changed
289 to another value after that. */
290 std::atomic<next_t> m_next;
291
292 private:
293 /** Count the number of deleted elements. The value returned could
294 be inaccurate because it is obtained without any locks.
295 @param[in] deleted_val the constant that designates that
296 a value is deleted
297 @return the number of deleted elements */
298 size_t n_deleted(int64_t deleted_val) const {
299 size_t ret = 0;
300
301 for (size_t i = 0; i < m_n_base_elements; i++) {
302 const int64_t val = m_base[i].m_val.load(std::memory_order_relaxed);
303
304 if (val == deleted_val) {
305 ret++;
306 }
307 }
308
309 return (ret);
310 }
311
312 /** Counter for the current number of readers and writers to this
313 object. This object is destroyed only after it is removed from the
314 list, so that no new readers or writers may arrive, and after this
315 counter has dropped to zero. */
317};
318
319/** Lock free hash table. It stores (key, value) pairs where both the key
320and the value are of integer type.
321* The possible keys are:
322 * UNUSED: a (key = UNUSED, val) tuple means that it is empty/unused. This is
323 the initial state of all keys.
324 * AVOID: a (key = AVOID, val = any) tuple means that this tuple is disabled,
325 does not contain a real data and should be avoided by searches and
326 inserts. This is used when migrating all elements of an array to the
327 next array.
328 * real key: anything other than the above means this is a real key,
329 specified by the user.
330
331* The possible values are:
332 * NOT_FOUND: a (key, val = NOT_FOUND) tuple means that it is just being
333 inserted and returning "not found" is ok. This is the initial state of all
334 values.
335 * DELETED: a (key, val = DELETED) tuple means that a tuple with this key
336 existed before but was deleted at some point. Searches for this key return
337 "not found" and inserts reuse the tuple, replacing the DELETED value with
338 something else.
339 * GOTO_NEXT_ARRAY: a (key, val = GOTO_NEXT_ARRAY) tuple means that the
340 searches for this tuple (get and insert) should go to the next array and
341 repeat the search there. Used when migrating all tuples from one array to
342 the next.
343 * real value: anything other than the above means this is a real value,
344 specified by the user.
345
346* Transitions for keys (a real key is anything other than UNUSED and AVOID):
347 * UNUSED -> real key -- allowed
348 * UNUSED -> AVOID -- allowed
349 anything else is not allowed:
350 * real key -> UNUSED -- not allowed
351 * real key -> AVOID -- not allowed
352 * real key -> another real key -- not allowed
353 * AVOID -> UNUSED -- not allowed
354 * AVOID -> real key -- not allowed
355
356* Transitions for values (a real value is anything other than NOT_FOUND,
357 DELETED and GOTO_NEXT_ARRAY):
358 * NOT_FOUND -> real value -- allowed
359 * NOT_FOUND -> DELETED -- allowed
360 * real value -> another real value -- allowed
361 * real value -> DELETED -- allowed
362 * real value -> GOTO_NEXT_ARRAY -- allowed
363 * DELETED -> real value -- allowed
364 * DELETED -> GOTO_NEXT_ARRAY -- allowed
365 anything else is not allowed:
366 * NOT_FOUND -> GOTO_NEXT_ARRAY -- not allowed
367 * real value -> NOT_FOUND -- not allowed
368 * DELETED -> NOT_FOUND -- not allowed
369 * GOTO_NEXT_ARRAY -> real value -- not allowed
370 * GOTO_NEXT_ARRAY -> NOT_FOUND -- not allowed
371 * GOTO_NEXT_ARRAY -> DELETED -- not allowed
372*/
374 public:
375 /** Constructor. Not thread safe.
376 @param[in] initial_size number of elements to allocate
377 initially. Must be a power of 2, greater than 0.
378 @param[in] del_when_zero if true then automatically delete a
379 tuple from the hash if due to increment or decrement its value becomes
380 zero. */
381 explicit ut_lock_free_hash_t(size_t initial_size, bool del_when_zero)
382 : m_del_when_zero(del_when_zero)
383#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
384 ,
385 m_n_search(0),
386 m_n_search_iterations(0)
387#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
388 {
389 ut_a(initial_size > 0);
390 ut_a(ut_is_2pow(initial_size));
391
392 m_data.store(arr_node_t::alloc(initial_size));
393
395
396 m_hollow_objects = ut::new_withkey<hollow_t>(
399 }
400
401 /** Destructor. Not thread safe. */
404
405 arr_node_t *arr = m_data.load();
406
407 do {
408 arr_node_t *next = arr->m_next.load();
409
411
412 arr = next;
413 } while (arr != nullptr);
414
415 while (!m_hollow_objects->empty()) {
417 m_hollow_objects->pop_front();
418 }
420 }
421
422 /** Get the value mapped to a given key.
423 @param[in] key key to look for
424 @return the value that corresponds to key or NOT_FOUND. */
425 int64_t get(uint64_t key) const override {
426 ut_ad(key != UNUSED);
427 ut_ad(key != AVOID);
428
429 arr_node_t *arr = m_data.load();
430
431 for (;;) {
432 auto handle_and_tuple{get_tuple(key, &arr)};
433 const auto tuple = handle_and_tuple.second;
434
435 if (tuple == nullptr) {
436 return (NOT_FOUND);
437 }
438
439 /* Here if another thread is just setting this key
440 for the first time, then the tuple could be
441 (key, NOT_FOUND) (remember all vals are initialized
442 to NOT_FOUND initially) in which case we will return
443 NOT_FOUND below which is fine. */
444
445 int64_t v = tuple->m_val.load(std::memory_order_relaxed);
446
447 if (v == DELETED) {
448 return (NOT_FOUND);
449 } else if (v != GOTO_NEXT_ARRAY) {
450 return (v);
451 }
452
453 /* Prevent reorder of the below m_next.load() with
454 the above m_val.load().
455 We want to be sure that if m_val is GOTO_NEXT_ARRAY,
456 then the next array exists. */
457
458 arr = arr->m_next.load();
459 }
460 }
461
462 /** Set the value for a given key, either inserting a new (key, val)
463 tuple or overwriting an existent value. If two threads call this
464 method at the same time with the key, but different val, then when
465 both methods have finished executing the value will be one of the
466 two ones, but undeterministic which one. E.g.
467 Thread 1: set(key, val_a)
468 Thread 2: set(key, val_b)
469 when both have finished, then a tuple with the given key will be
470 present with value either val_a or val_b.
471 @param[in] key key whose value to set
472 @param[in] val value to be set */
473 void set(uint64_t key, int64_t val) override {
474 ut_ad(key != UNUSED);
475 ut_ad(key != AVOID);
476 ut_ad(val != NOT_FOUND);
477 ut_ad(val != DELETED);
478 ut_ad(val != GOTO_NEXT_ARRAY);
479
480 insert_or_update(key, val, false, m_data.load());
481 }
482
483 /** Delete a (key, val) pair from the hash.
484 If this gets called concurrently with get(), inc(), dec() or set(),
485 then to the caller it will look like the calls executed in isolation,
486 the hash structure itself will not be damaged, but it is undefined in
487 what order the calls will be executed. For example:
488 Let this tuple exist in the hash: (key == 5, val == 10)
489 Thread 1: inc(key == 5)
490 Thread 2: del(key == 5)
491 [1] If inc() executes first then the tuple will become
492 (key == 5, val == 11) and then del() will make it
493 (key == 5, val == DELETED), which get()s for key == 5 will return as
494 NOT_FOUND.
495 [2] If del() executes first then the tuple will become
496 (key == 5, val == DELETED) and then inc() will change it to
497 (key == 5, value == 1).
498 It is undefined which one of [1] or [2] will happen. It is up to the
499 caller to accept this behavior or prevent it at a higher level.
500 @param[in] key key whose pair to delete */
501 void del(uint64_t key) override {
502 ut_ad(key != UNUSED);
503 ut_ad(key != AVOID);
504
505 arr_node_t *arr = m_data.load();
506
507 for (;;) {
508 auto handle_and_tuple{get_tuple(key, &arr)};
509 const auto tuple = handle_and_tuple.second;
510
511 if (tuple == nullptr) {
512 /* Nothing to delete. */
513 return;
514 }
515
516 int64_t v = tuple->m_val.load(std::memory_order_relaxed);
517
518 for (;;) {
519 if (v == GOTO_NEXT_ARRAY) {
520 break;
521 }
522
523 if (tuple->m_val.compare_exchange_strong(v, DELETED)) {
524 return;
525 }
526
527 /* CAS stored the most recent value of 'm_val'
528 into 'v'. */
529 }
530
531 /* Prevent reorder of the below m_next.load() with the
532 above m_val.load() or the load from
533 m_val.compare_exchange_strong().
534 We want to be sure that if m_val is GOTO_NEXT_ARRAY,
535 then the next array exists. */
536
537 arr = arr->m_next.load();
538 }
539 }
540
541 /** Increment the value for a given key with 1 or insert a new tuple
542 (key, 1).
543 If two threads call this method at the same time with the same key,
544 then it is guaranteed that when both calls have finished, the value
545 will be incremented with 2.
546 If two threads call this method and set() at the same time with the
547 same key it is undeterministic whether the value will be what was
548 given to set() or what was given to set() + 1. E.g.
549 Thread 1: set(key, val)
550 Thread 2: inc(key)
551 or
552 Thread 1: inc(key)
553 Thread 2: set(key, val)
554 when both have finished the value will be either val or val + 1.
555 @param[in] key key whose value to increment or insert as 1 */
556 void inc(uint64_t key) override {
557 ut_ad(key != UNUSED);
558 ut_ad(key != AVOID);
559
560 insert_or_update(key, 1, true, m_data.load());
561 }
562
563 /** Decrement the value of a given key with 1 or insert a new tuple
564 (key, -1).
565 With respect to calling this together with set(), inc() or dec() the
566 same applies as with inc(), see its comment. The only guarantee is
567 that the calls will execute in isolation, but the order in which they
568 will execute is undeterministic.
569 @param[in] key key whose value to decrement */
570 void dec(uint64_t key) override {
571 ut_ad(key != UNUSED);
572 ut_ad(key != AVOID);
573
574 insert_or_update(key, -1, true, m_data.load());
575 }
576
577#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
578 /** Print statistics about how many searches have been done on the hash
579 and how many collisions. */
580 void print_stats() {
581 const ulint n_search = m_n_search;
582 const ulint n_search_iterations = m_n_search_iterations;
583
584 ib::info info(ER_IB_MSG_LOCK_FREE_HASH_USAGE_STATS);
585 info << "Lock free hash usage stats: number of searches=" << n_search
586 << ", number of search iterations=" << n_search_iterations;
587 if (n_search != 0) {
588 info << "average iterations per search: "
589 << (double)n_search_iterations / n_search;
590 }
591 }
592#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
593
594 private:
595 /** A key == UNUSED designates that this cell in the array is empty. */
596 static const uint64_t UNUSED = UINT64_MAX;
597
598 /** A key == AVOID designates an unusable cell. This cell of the array
599 has been empty (key == UNUSED), but was then marked as AVOID in order
600 to prevent new inserts into it. Searches should treat this like
601 UNUSED (ie if they encounter it before the key they are searching for
602 then stop the search and declare 'not found'). */
603 static const uint64_t AVOID = UNUSED - 1;
604
605 /** A val == DELETED designates that this cell in the array has been
606 used in the past, but it was deleted later. Searches should return
607 NOT_FOUND when they encounter it. */
608 static const int64_t DELETED = NOT_FOUND - 1;
609
610 /** A val == GOTO_NEXT_ARRAY designates that this tuple (key, whatever)
611 has been moved to the next array. The search for it should continue
612 there. */
613 static const int64_t GOTO_NEXT_ARRAY = DELETED - 1;
614
615 /** (key, val) tuple type. */
616 struct key_val_t {
618
619 /** Key. */
620 std::atomic<uint64_t> m_key;
621
622 /** Value. */
623 std::atomic<int64_t> m_val;
624 };
625
626 /** An array node in the hash. The hash table consists of a linked
627 list of such nodes. */
629
630 /** A hash function used to map a key to its suggested position in the
631 array. A linear search to the right is done after this position to find
632 the tuple with the given key or find a tuple with key == UNUSED or
633 AVOID which means that the key is not present in the array.
634 @param[in] key key to map into a position
635 @param[in] arr_size number of elements in the array
636 @return a position (index) in the array where the tuple is guessed
637 to be */
638 size_t guess_position(uint64_t key, size_t arr_size) const {
639 /* Implement a better hashing function to map
640 [0, UINT64_MAX] -> [0, arr_size - 1] if this one turns
641 out to generate too many collisions. */
642
643 /* arr_size is a power of 2. */
644 return (static_cast<size_t>(ut::hash_uint64(key) & (arr_size - 1)));
645 }
646
647 /** Get the array cell of a key from a given array.
648 @param[in] arr array to search into
649 @param[in] arr_size number of elements in the array
650 @param[in] key search for a tuple with this key
651 @return pointer to the array cell or NULL if not found */
653 uint64_t key) const {
654#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
655 ++m_n_search;
656#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
657
658 const size_t start = guess_position(key, arr_size);
659 const size_t end = start + arr_size;
660
661 for (size_t i = start; i < end; i++) {
662#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
663 ++m_n_search_iterations;
664#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
665
666 /* arr_size is a power of 2. */
667 const size_t cur_pos = i & (arr_size - 1);
668
669 key_val_t *cur_tuple = &arr[cur_pos];
670
671 const uint64_t cur_key = cur_tuple->m_key.load(std::memory_order_relaxed);
672
673 if (cur_key == key) {
674 return (cur_tuple);
675 } else if (cur_key == UNUSED || cur_key == AVOID) {
676 return (nullptr);
677 }
678 }
679
680 return (nullptr);
681 }
682
683 /** Get the array cell of a key.
684 @param[in] key key to search for
685 @param[in,out] arr start the search from this array; when this
686 method ends, *arr will point to the array in which the search
687 ended (in which the returned key_val resides)
688 @return If key was found: handle to the (updated) arr which contains the tuple
689 and pointer to the array cell with the tuple. Otherwise an empty handle, and
690 nullptr. */
691 std::pair<ut_lock_free_cnt_t::handle_t, key_val_t *> get_tuple(
692 uint64_t key, arr_node_t **arr) const {
693 for (;;) {
694 auto handle = (*arr)->begin_access();
695 if (!handle) {
696 /* The array has been garbaged, restart the search from the beginning.*/
697 *arr = m_data.load();
698 continue;
699 }
700
701 key_val_t *t = get_tuple_from_array((*arr)->m_base.get(),
702 (*arr)->m_n_base_elements, key);
703
704 if (t != nullptr) {
705 return {std::move(handle), t};
706 }
707
708 *arr = (*arr)->m_next.load();
709
710 if (*arr == nullptr) {
711 return {};
712 }
713 }
714 }
715
716 /** Insert the given key into a given array or return its cell if
717 already present.
718 @param[in] arr array into which to search and insert
719 @param[in] arr_size number of elements in the array
720 @param[in] key key to insert or whose cell to retrieve
721 @return a pointer to the inserted or previously existent tuple or NULL
722 if a tuple with this key is not present in the array and the array is
723 full, without any unused cells and thus insertion cannot be done into
724 it. */
726 uint64_t key) {
727#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
728 ++m_n_search;
729#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
730
731 const size_t start = guess_position(key, arr_size);
732 const size_t end = start + arr_size;
733
734 for (size_t i = start; i < end; i++) {
735#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
736 ++m_n_search_iterations;
737#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
738
739 /* arr_size is a power of 2. */
740 const size_t cur_pos = i & (arr_size - 1);
741
742 key_val_t *cur_tuple = &arr[cur_pos];
743
744 const uint64_t cur_key = cur_tuple->m_key.load(std::memory_order_relaxed);
745
746 if (cur_key == key) {
747 return (cur_tuple);
748 }
749
750 if (cur_key == UNUSED) {
751 uint64_t expected = UNUSED;
752 if (cur_tuple->m_key.compare_exchange_strong(expected, key)) {
753 return (cur_tuple);
754 }
755
756 /* CAS failed, which means that some other
757 thread just changed the current key from UNUSED
758 to something else (which is stored in
759 'expected'). See if the new value is 'key'. */
760 if (expected == key) {
761 return (cur_tuple);
762 }
763
764 /* The current key, which was UNUSED, has been
765 replaced with something else (!= key) by a
766 concurrently executing insert. Keep searching
767 for a free slot. */
768 }
769
770 /* Skip through tuples with key == AVOID. */
771 }
772
773 return (nullptr);
774 }
775
776 /** Copy all used elements from one array to another. Flag the ones
777 in the old array as 'go to the next array'.
778 @param[in,out] src_arr array to copy from
779 @param[in,out] dst_arr array to copy to */
780 void copy_to_another_array(arr_node_t *src_arr, arr_node_t *dst_arr) {
781 for (size_t i = 0; i < src_arr->m_n_base_elements; i++) {
782 key_val_t *t = &src_arr->m_base[i];
783
784 uint64_t k = t->m_key.load(std::memory_order_relaxed);
785
786 /* Prevent further inserts into empty cells. */
787 if (k == UNUSED && t->m_key.compare_exchange_strong(k, AVOID)) {
788 continue;
789 }
790
791 int64_t v = t->m_val.load(std::memory_order_relaxed);
792
793 /* Insert (k, v) into the destination array. We know
794 that nobody else will try this concurrently with this
795 thread because:
796 * this code is being executed by just one thread (the
797 thread that managed to grow the list of arrays) and
798 * other normal inserts/updates with (key == k) will
799 pick the entry in src_arr. */
800
801 /* If we copied the tuple to dst_arr once then we must
802 keep trying to transfer its most recent value to
803 dst_arr, even if its value becomes DELETED. Otherwise
804 a delete operation on this tuple which executes
805 concurrently with the loop below may be lost. */
806 bool copied = false;
807
808 for (;;) {
809 if (v != DELETED || copied) {
810 insert_or_update(k, v, false, dst_arr, false);
811 copied = true;
812 }
813
814 /* Prevent any preceding memory operations (the
815 stores from insert_or_update() in particular)
816 to be reordered past the store from
817 m_val.compare_exchange_strong() below. We want
818 to be sure that if m_val is GOTO_NEXT_ARRAY,
819 then the entry is indeed present in some of the
820 next arrays (ie that insert_or_update() has
821 completed and that its effects are visible to
822 other threads). */
823
824 /* Now that we know (k, v) is present in some
825 of the next arrays, try to CAS the tuple
826 (k, v) to (k, GOTO_NEXT_ARRAY) in the current
827 array. */
828
829 if (t->m_val.compare_exchange_strong(v, GOTO_NEXT_ARRAY)) {
830 break;
831 }
832
833 /* If CAS fails, this means that m_val has been
834 changed in the meantime and the CAS will store
835 m_val's most recent value in 'v'. Retry both
836 operations (this time the insert_or_update()
837 call will be an update, rather than an
838 insert). */
839 }
840 }
841 }
842
843 /** Update the value of a given tuple.
844 @param[in,out] t tuple whose value to update
845 @param[in] val_to_set value to set or delta to apply
846 @param[in] is_delta if true then set the new value to
847 old + val, otherwise just set to val
848 @retval true update succeeded
849 @retval false update failed due to GOTO_NEXT_ARRAY
850 @return whether the update succeeded or not */
851 bool update_tuple(key_val_t *t, int64_t val_to_set, bool is_delta) {
852 int64_t cur_val = t->m_val.load(std::memory_order_relaxed);
853
854 for (;;) {
855 if (cur_val == GOTO_NEXT_ARRAY) {
856 return (false);
857 }
858
859 int64_t new_val;
860
861 if (is_delta && cur_val != NOT_FOUND && cur_val != DELETED) {
862 if (m_del_when_zero && cur_val + val_to_set == 0) {
863 new_val = DELETED;
864 } else {
865 new_val = cur_val + val_to_set;
866 }
867 } else {
868 new_val = val_to_set;
869 }
870
871 if (t->m_val.compare_exchange_strong(cur_val, new_val)) {
872 return (true);
873 }
874
875 /* When CAS fails it sets the most recent value of
876 m_val into cur_val. */
877 }
878 }
879
880 /** Optimize the hash table. Called after a new array is appended to
881 the list of arrays (grow). Having more than one array in the list
882 of arrays works, but is suboptimal because searches
883 (for get/insert/update) have to search in more than one array. This
884 method starts from the head of the list and migrates all tuples from
885 this array to the next arrays. Then it removes the array from the
886 head of the list, waits for readers to go away and frees it's m_base
887 member. */
888 void optimize() {
890
891 for (;;) {
892 arr_node_t *arr = m_data.load();
893
894 arr_node_t *next = arr->m_next.load();
895
896 if (next == nullptr) {
897 break;
898 }
899
900 /* begin_access() (ref counting) for 'arr' and 'next'
901 around copy_to_another_array() is not needed here
902 because the only code that frees memory is below,
903 serialized with a mutex. */
904
905 copy_to_another_array(arr, next);
906
907 arr->m_pending_free.store(true);
908
909 arr_node_t *expected = arr;
910
911 /* Detach 'arr' from the list. Ie move the head of the
912 list 'm_data' from 'arr' to 'arr->m_next'. */
913 ut_a(m_data.compare_exchange_strong(expected, next));
914
915 /* Spin/wait for all threads to stop looking at
916 this array. If at some point this turns out to be
917 sub-optimal (ie too long busy wait), then 'arr' could
918 be added to some lazy deletion list
919 arrays-awaiting-destruction-once-no-readers. */
921
922 arr->m_base.reset();
923
924 m_hollow_objects->push_back(arr);
925 }
926
928 }
929
930 /** Insert a new tuple or update an existent one. If a tuple with this
931 key does not exist then a new one is inserted (key, val) and is_delta
932 is ignored. If a tuple with this key exists and is_delta is true, then
933 the current value is changed to be current value + val, otherwise it
934 is overwritten to be val.
935 @param[in] key key to insert or whose value
936 to update
937 @param[in] val value to set; if the tuple
938 does not exist or if is_delta is false, then the new value is set
939 to val, otherwise it is set to old + val
940 @param[in] is_delta if true then set the new
941 value to old + val, otherwise just set to val.
942 @param[in] arr array to start the search from
943 @param[in] optimize_allowed if true then call optimize()
944 after an eventual grow(), if false, then never call optimize(). Used
945 to prevent recursive optimize() call by insert_or_update() ->
946 optimize() -> copy_to_another_array() -> insert_or_update() ->
947 optimize(). */
948 void insert_or_update(uint64_t key, int64_t val, bool is_delta,
949 arr_node_t *arr, bool optimize_allowed = true) {
950 bool call_optimize = false;
951
952 /* Loop through the arrays until we find a free slot to insert
953 or until we find a tuple with the specified key and manage to
954 update it. */
955 for (;;) {
956 auto handle = arr->begin_access();
957 if (!handle) {
958 /* The array has been garbaged, try the next one. */
959 arr = arr->m_next.load();
960 continue;
961 }
962
964 arr->m_base.get(), arr->m_n_base_elements, key);
965
966 /* t == NULL means that the array is full, must expand
967 and go to the next array. */
968
969 /* update_tuple() returning false means that the value
970 of the tuple is GOTO_NEXT_ARRAY, so we must go to the
971 next array. */
972
973 if (t != nullptr && update_tuple(t, val, is_delta)) {
974 break;
975 }
976
977 arr_node_t *next = arr->m_next.load();
978
979 if (next != nullptr) {
980 arr = next;
981 /* Prevent any subsequent memory operations
982 (the reads from the next array in particular)
983 to be reordered before the m_next.load()
984 above. */
985 continue;
986 }
987
988 bool grown_by_this_thread;
989
990 arr = arr->grow(DELETED, &grown_by_this_thread);
991
992 if (grown_by_this_thread) {
993 call_optimize = true;
994 }
995 }
996
997 if (optimize_allowed && call_optimize) {
998 optimize();
999 }
1000 }
1001
1002 /** Storage for the (key, val) tuples. */
1003 std::atomic<arr_node_t *> m_data;
1004
1006 typedef std::list<arr_node_t *, hollow_alloc_t> hollow_t;
1007
1008 /** Container for hollow (semi-destroyed) objects that have been
1009 removed from the list that starts at m_data. Those objects have their
1010 m_base member freed and are entirely destroyed at the end of the hash
1011 table life time. The access to this member is protected by
1012 m_optimize_latch (adding of new elements) and the removal of elements
1013 is done in the destructor of the hash table. */
1015
1016 /** Concurrent copy-all-elements-to-the-next-array, removal of the
1017 head of the list and freeing of its m_base member are serialized with
1018 this latch. Those operations could be implemented without serialization,
1019 but this immensely increases the complexity of the code. Growing of the
1020 hash table is not too hot operation and thus we chose simplicity and
1021 maintainability instead of top performance in this case. "Get"
1022 operations and "insert/update" ones that do not cause grow still
1023 run concurrently even if this latch is locked. */
1025
1026 /** True if a tuple should be automatically deleted from the hash
1027 if its value becomes 0 after an increment or decrement. */
1029
1030#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
1031 /* The atomic type gives correct results, but has a _huge_
1032 performance impact. The unprotected operation gives a significant
1033 skew, but has almost no performance impact. */
1034
1035 /** Number of searches performed in this hash. */
1036 mutable std::atomic<uint64_t> m_n_search;
1037
1038 /** Number of elements processed for all searches. */
1039 mutable std::atomic<uint64_t> m_n_search_iterations;
1040#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
1041};
1042
1043#endif /* ut0lock_free_hash_h */
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:250
The class info is used to emit informational log messages.
Definition: ut0log.h:188
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2180
An interface class to a basic hash table, that ut_lock_free_hash_t is.
Definition: ut0lock_free_hash.h:51
static const int64_t NOT_FOUND
The value that is returned when the searched for key is not found.
Definition: ut0lock_free_hash.h:55
virtual void del(uint64_t key)=0
Delete a (key, val) pair from the hash.
virtual int64_t get(uint64_t key) const =0
Get the value mapped to a given key.
virtual void dec(uint64_t key)=0
Decrement the value of a given key with 1 or insert a new tuple (key, -1).
virtual void inc(uint64_t key)=0
Increment the value for a given key with 1 or insert a new tuple (key, 1).
virtual void set(uint64_t key, int64_t val)=0
Set the value for a given key, either inserting a new (key, val) tuple or overwriting an existent val...
virtual ~ut_hash_interface_t()=default
Destructor.
Definition: ut0lock_free_hash.h:105
handle_t(handle_t &&other) noexcept
Definition: ut0lock_free_hash.h:113
std::atomic< uint64_t > * m_counter
Definition: ut0lock_free_hash.h:126
handle_t(std::atomic< uint64_t > *counter)
Definition: ut0lock_free_hash.h:109
~handle_t()
Definition: ut0lock_free_hash.h:119
handle_t()
Definition: ut0lock_free_hash.h:107
Lock free ref counter.
Definition: ut0lock_free_hash.h:94
ut_lock_free_cnt_t()
Constructor.
Definition: ut0lock_free_hash.h:97
size_t n_cnt_index() const
Derive an appropriate index in m_cnt[] for the current thread.
Definition: ut0lock_free_hash.h:149
handle_t reference()
Increment the counter.
Definition: ut0lock_free_hash.h:130
void await_release_of_old_references() const
Wait until all previously existing references get released.
Definition: ut0lock_free_hash.h:138
std::array< ut::Cacheline_aligned< std::atomic< uint64_t > >, 256 > m_cnt
The shards of the counter.
Definition: ut0lock_free_hash.h:170
Lock free hash table.
Definition: ut0lock_free_hash.h:373
void set(uint64_t key, int64_t val) override
Set the value for a given key, either inserting a new (key, val) tuple or overwriting an existent val...
Definition: ut0lock_free_hash.h:473
static const int64_t DELETED
A val == DELETED designates that this cell in the array has been used in the past,...
Definition: ut0lock_free_hash.h:608
void inc(uint64_t key) override
Increment the value for a given key with 1 or insert a new tuple (key, 1).
Definition: ut0lock_free_hash.h:556
bool update_tuple(key_val_t *t, int64_t val_to_set, bool is_delta)
Update the value of a given tuple.
Definition: ut0lock_free_hash.h:851
bool m_del_when_zero
True if a tuple should be automatically deleted from the hash if its value becomes 0 after an increme...
Definition: ut0lock_free_hash.h:1028
static const uint64_t UNUSED
A key == UNUSED designates that this cell in the array is empty.
Definition: ut0lock_free_hash.h:596
size_t guess_position(uint64_t key, size_t arr_size) const
A hash function used to map a key to its suggested position in the array.
Definition: ut0lock_free_hash.h:638
ut_lock_free_list_node_t< key_val_t > arr_node_t
An array node in the hash.
Definition: ut0lock_free_hash.h:628
void optimize()
Optimize the hash table.
Definition: ut0lock_free_hash.h:888
void insert_or_update(uint64_t key, int64_t val, bool is_delta, arr_node_t *arr, bool optimize_allowed=true)
Insert a new tuple or update an existent one.
Definition: ut0lock_free_hash.h:948
ib_mutex_t m_optimize_latch
Concurrent copy-all-elements-to-the-next-array, removal of the head of the list and freeing of its m_...
Definition: ut0lock_free_hash.h:1024
void dec(uint64_t key) override
Decrement the value of a given key with 1 or insert a new tuple (key, -1).
Definition: ut0lock_free_hash.h:570
std::pair< ut_lock_free_cnt_t::handle_t, key_val_t * > get_tuple(uint64_t key, arr_node_t **arr) const
Get the array cell of a key.
Definition: ut0lock_free_hash.h:691
static const uint64_t AVOID
A key == AVOID designates an unusable cell.
Definition: ut0lock_free_hash.h:603
key_val_t * insert_or_get_position_in_array(key_val_t *arr, size_t arr_size, uint64_t key)
Insert the given key into a given array or return its cell if already present.
Definition: ut0lock_free_hash.h:725
std::list< arr_node_t *, hollow_alloc_t > hollow_t
Definition: ut0lock_free_hash.h:1006
void del(uint64_t key) override
Delete a (key, val) pair from the hash.
Definition: ut0lock_free_hash.h:501
key_val_t * get_tuple_from_array(key_val_t *arr, size_t arr_size, uint64_t key) const
Get the array cell of a key from a given array.
Definition: ut0lock_free_hash.h:652
int64_t get(uint64_t key) const override
Get the value mapped to a given key.
Definition: ut0lock_free_hash.h:425
hollow_t * m_hollow_objects
Container for hollow (semi-destroyed) objects that have been removed from the list that starts at m_d...
Definition: ut0lock_free_hash.h:1014
void copy_to_another_array(arr_node_t *src_arr, arr_node_t *dst_arr)
Copy all used elements from one array to another.
Definition: ut0lock_free_hash.h:780
ut::allocator< arr_node_t * > hollow_alloc_t
Definition: ut0lock_free_hash.h:1005
ut_lock_free_hash_t(size_t initial_size, bool del_when_zero)
Constructor.
Definition: ut0lock_free_hash.h:381
static const int64_t GOTO_NEXT_ARRAY
A val == GOTO_NEXT_ARRAY designates that this tuple (key, whatever) has been moved to the next array.
Definition: ut0lock_free_hash.h:613
~ut_lock_free_hash_t() override
Destructor.
Definition: ut0lock_free_hash.h:402
std::atomic< arr_node_t * > m_data
Storage for the (key, val) tuples.
Definition: ut0lock_free_hash.h:1003
A node in a linked list of arrays.
Definition: ut0lock_free_hash.h:176
static ut_lock_free_list_node_t * alloc(size_t n_elements)
Definition: ut0lock_free_hash.h:191
std::atomic< next_t > m_next
Pointer to the next node if any or NULL.
Definition: ut0lock_free_hash.h:290
size_t n_deleted(int64_t deleted_val) const
Count the number of deleted elements.
Definition: ut0lock_free_hash.h:298
ut_lock_free_list_node_t< T > * next_t
Definition: ut0lock_free_hash.h:178
ut_lock_free_cnt_t m_n_ref
Counter for the current number of readers and writers to this object.
Definition: ut0lock_free_hash.h:316
std::atomic_bool m_pending_free
Indicate whether some thread is waiting for readers to go away before it can free the memory occupied...
Definition: ut0lock_free_hash.h:285
next_t grow(int64_t deleted_val, bool *grown_by_this_thread)
Create and append a new array to this one and store a pointer to it in 'm_next'.
Definition: ut0lock_free_hash.h:212
ut_lock_free_cnt_t::handle_t begin_access()
Mark the beginning of an access to this object.
Definition: ut0lock_free_hash.h:258
size_t m_n_base_elements
Number of elements in 'm_base'.
Definition: ut0lock_free_hash.h:281
static void dealloc(ut_lock_free_list_node_t *ptr)
Definition: ut0lock_free_hash.h:197
ut_lock_free_list_node_t(size_t n_elements)
Constructor.
Definition: ut0lock_free_hash.h:182
ut::unique_ptr< T[]> m_base
Base array.
Definition: ut0lock_free_hash.h:278
void await_release_of_old_references()
Wait until all previously held references are released.
Definition: ut0lock_free_hash.h:273
static void start(mysql_harness::PluginFuncEnv *env)
Definition: http_auth_backend_plugin.cc:176
uint counter
Definition: mysqlimport.cc:57
std::string print_stats(::recovery_statistics const &internal_stats, ::recovery_statistics const &external_stats)
Composes a string presenting the statistics for the given objects.
Definition: recovery.cc:364
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:307
Unique_ptr< T, std::nullptr_t > make_unique(size_t size)
In-place constructs a new unique pointer with no specific allocator and with array type T.
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
static int handle(int sql_errno, const char *sqlstate, const char *message, void *state)
Bridge function between the C++ API offered by this module and the C API of the parser service.
Definition: services.cc:63
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:47
const thread_local size_t this_thread_hash
The hash value of the current thread's id.
Definition: os0thread.h:72
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:188
void delete_(T *ptr) noexcept
Releases storage which has been dynamically allocated through any of the ut::new*() variants.
Definition: ut0new.h:808
T * new_arr(Args &&... args)
Dynamically allocates storage for an array of T's.
Definition: ut0new.h:958
std::conditional_t< !std::is_array< T >::value, std::unique_ptr< T, detail::Deleter< T > >, std::conditional_t< detail::is_unbounded_array_v< T >, std::unique_ptr< T, detail::Array_deleter< std::remove_extent_t< T > > >, void > > unique_ptr
The following is a common type that is returned by all the ut::make_unique (non-aligned) specializati...
Definition: ut0new.h:2437
PSI_memory_key_t make_psi_memory_key(PSI_memory_key key)
Convenience helper function to create type-safe representation of PSI_memory_key.
Definition: ut0new.h:189
void aligned_delete(T *ptr) noexcept
Releases storage which has been dynamically allocated through any of the aligned_new_*() variants.
Definition: ut0new.h:1649
NUMA API wrapper over various operating system specific APIs.
The interface to the operating system process and thread control primitives.
required string key
Definition: replication_asynchronous_connection_failover.proto:59
(key, val) tuple type.
Definition: ut0lock_free_hash.h:616
key_val_t()
Definition: ut0lock_free_hash.h:617
std::atomic< int64_t > m_val
Value.
Definition: ut0lock_free_hash.h:623
std::atomic< uint64_t > m_key
Key.
Definition: ut0lock_free_hash.h:620
@ LATCH_ID_LOCK_FREE_HASH
Definition: sync0types.h:372
Version control for database, common definitions, and include files.
unsigned long int ulint
Definition: univ.i:405
Utilities related to CPU cache.
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:104
#define ut_a(EXPR)
Abort execution if EXPR does not evaluate to nonzero.
Definition: ut0dbg.h:92
Policy based mutexes.
void mutex_destroy(Mutex *mutex)
Removes a mutex instance from the mutex list.
Definition: ut0mutex.h:247
#define mutex_exit(M)
Definition: ut0mutex.h:122
#define mutex_enter(M)
Definition: ut0mutex.h:116
#define mutex_create(I, M)
Definition: ut0mutex.h:109
Dynamic memory allocation routines and custom allocators specifically crafted to support memory instr...
PSI_memory_key mem_key_ut_lock_free_hash_t
Definition: ut0new.cc:63
Random numbers and hashing.
#define ut_is_2pow(n)
Determines if a number is zero or a power of two.
Definition: ut0ut.h:202