MySQL 9.1.0
Source Code Documentation
ut0lock_free_hash.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2015, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ut0lock_free_hash.h
29 Lock free hash implementation
30
31 Created Mar 16, 2015 Vasil Dimov
32 *******************************************************/
33
34#ifndef ut0lock_free_hash_h
35#define ut0lock_free_hash_h
36
37#define BOOST_ATOMIC_NO_LIB
38
39#include "univ.i"
40
41#include <atomic>
42#include <list>
43
44#include "os0numa.h" /* os_getcpu() */
45#include "os0thread.h" /* ut::this_thread_hash */
46#include "ut0cpu_cache.h" /* Cache_aligned<T> */
47#include "ut0mutex.h" /* ib_mutex_t */
48#include "ut0new.h" /* UT_NEW*(), ut::delete_*() */
49#include "ut0rnd.h" /* ut::hash_uint64() */
50
51/** An interface class to a basic hash table, that ut_lock_free_hash_t is. */
53 public:
54 /** The value that is returned when the searched for key is not
55 found. */
56 static const int64_t NOT_FOUND = INT64_MAX;
57
58 /** Destructor. */
59 virtual ~ut_hash_interface_t() = default;
60
61 /** Get the value mapped to a given key.
62 @param[in] key key to look for
63 @return the value that corresponds to key or NOT_FOUND. */
64 virtual int64_t get(uint64_t key) const = 0;
65
66 /** Set the value for a given key, either inserting a new (key, val)
67 tuple or overwriting an existent value.
68 @param[in] key key whose value to set
69 @param[in] val value to be set */
70 virtual void set(uint64_t key, int64_t val) = 0;
71
72 /** Delete a (key, val) pair from the hash.
73 @param[in] key key whose pair to delete */
74 virtual void del(uint64_t key) = 0;
75
76 /** Increment the value for a given key with 1 or insert a new tuple
77 (key, 1).
78 @param[in] key key whose value to increment or insert as 1 */
79 virtual void inc(uint64_t key) = 0;
80
81 /** Decrement the value of a given key with 1 or insert a new tuple
82 (key, -1).
83 @param[in] key key whose value to decrement */
84 virtual void dec(uint64_t key) = 0;
85
86#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
87 /** Print statistics about how many searches have been done on the hash
88 and how many collisions. */
89 virtual void print_stats() = 0;
90#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
91};
92
93/** Lock free ref counter. It uses a few counter variables internally to improve
94performance on machines with lots of CPUs. */
96 public:
97 /** Constructor. */
99 /* The initial value of std::atomic depends on C++ standard and the way
100 the containing object was initialized, so make sure it's always zero. */
101 for (size_t i = 0; i < m_cnt.size(); i++) {
102 m_cnt[i].store(0);
103 }
104 }
105
106 class handle_t {
107 public:
109
110 handle_t(std::atomic<uint64_t> *counter) : m_counter{counter} {
111 m_counter->fetch_add(1);
112 }
113
114 handle_t(handle_t &&other) noexcept : m_counter{other.m_counter} {
115 other.m_counter = nullptr;
116 }
117
118 explicit operator bool() const noexcept { return m_counter != nullptr; }
119
121 if (m_counter != nullptr) {
122 m_counter->fetch_sub(1);
123 }
124 }
125
126 private:
127 std::atomic<uint64_t> *m_counter;
128 };
129
130 /** Increment the counter. */
132
133 /** Wait until all previously existing references get released.
134 This function assumes that the caller ensured that no new references
135 should appear (or rather: no long-lived references - there can be treads which
136 call reference(), realize the object should no longer be referenced and
137 immediately release it)
138 */
140 for (size_t i = 0; i < m_cnt.size(); i++) {
141 while (m_cnt[i].load()) {
142 std::this_thread::yield();
143 }
144 }
145 }
146
147 private:
148 /** Derive an appropriate index in m_cnt[] for the current thread.
149 @return index in m_cnt[] for this thread to use */
150 size_t n_cnt_index() const {
151 size_t cpu;
152
153#ifdef HAVE_OS_GETCPU
154 cpu = static_cast<size_t>(os_getcpu());
155#else /* HAVE_OS_GETCPU */
157#endif /* HAVE_OS_GETCPU */
158
159 return cpu % m_cnt.size();
160 }
161
162 /** The shards of the counter.
163 We've just picked up some number that is supposedly larger than the number of
164 CPUs on the system or close to it, but small enough that
165 await_release_of_old_references() finishes in reasonable time, and that the
166 size (256 * 64B = 16 KiB) is not too large.
167 We pad the atomics to avoid false sharing. In particular, we hope that on
168 platforms which HAVE_OS_GETCPU the same CPU will always fetch the same counter
169 and thus will store it in its local cache. This should also help on NUMA
170 architectures by avoiding the cost of synchronizing caches between CPUs.*/
171 std::array<ut::Cacheline_aligned<std::atomic<uint64_t>>, 256> m_cnt;
172};
173
174/** A node in a linked list of arrays. The pointer to the next node is
175atomically set (CAS) when a next element is allocated. */
176template <typename T>
178 public:
180
181 /** Constructor.
182 @param[in] n_elements number of elements to create */
183 explicit ut_lock_free_list_node_t(size_t n_elements)
184 : m_base{ut::make_unique<T[]>(
186 m_n_base_elements{n_elements},
187 m_pending_free{false},
188 m_next{nullptr} {
189 ut_ad(n_elements > 0);
190 }
191
192 static ut_lock_free_list_node_t *alloc(size_t n_elements) {
193 return ut::aligned_new_withkey<ut_lock_free_list_node_t<T>>(
195 alignof(ut_lock_free_list_node_t<T>), n_elements);
196 }
197
200 }
201
202 /** Create and append a new array to this one and store a pointer
203 to it in 'm_next'. This is done in a way that multiple threads can
204 attempt this at the same time and only one will succeed. When this
205 method returns, the caller can be sure that the job is done (either
206 by this or another thread).
207 @param[in] deleted_val the constant that designates that
208 a value is deleted
209 @param[out] grown_by_this_thread set to true if the next
210 array was created and appended by this thread; set to false if
211 created and appended by another thread.
212 @return the next array, appended by this or another thread */
213 next_t grow(int64_t deleted_val, bool *grown_by_this_thread) {
214 size_t new_size;
215
216 if (m_n_base_elements > 1024 &&
217 n_deleted(deleted_val) > m_n_base_elements * 3 / 4) {
218 /* If there are too many deleted elements (more than
219 75%), then do not double the size. */
220 new_size = m_n_base_elements;
221 } else {
222 new_size = m_n_base_elements * 2;
223 }
224
225 next_t new_arr = alloc(new_size);
226
227 /* Publish the allocated entry. If somebody did this in the
228 meantime then just discard the allocated entry and do
229 nothing. */
230 next_t expected = nullptr;
231 if (!m_next.compare_exchange_strong(expected, new_arr)) {
232 /* Somebody just did that. */
234
235 /* 'expected' has the current value which
236 must be != NULL because the CAS failed. */
237 ut_ad(expected != nullptr);
238
239 *grown_by_this_thread = false;
240
241 return (expected);
242 } else {
243 *grown_by_this_thread = true;
244 return (new_arr);
245 }
246 }
247
248 /* This object is only ever destroyed after it is removed from the
249 list of arrays in the hash table (which means that new threads cannot
250 start using it) and the number of threads that use it has decreased
251 to zero. */
252
253 /** Mark the beginning of an access to this object. Used to prevent a
254 destruction of an array pointed by m_base while our thread is accessing it.
255 @return A handle which protects the m_base as long as the handle is not
256 destructed. If the handle is {} (==false), the access was denied, this object
257 is to be removed from the list and thus new access to it is not allowed.
258 The caller should retry from the head of the list. */
260 auto handle = m_n_ref.reference();
261
262 if (m_pending_free.load()) {
263 /* Don't allow access if freeing is pending. Ie if
264 another thread is waiting for readers to go away
265 before it can free the m_base's member of this
266 object. */
267 return {};
268 }
269
270 return handle;
271 }
272
273 /** Wait until all previously held references are released */
276 }
277
278 /** Base array. */
280
281 /** Number of elements in 'm_base'. */
283
284 /** Indicate whether some thread is waiting for readers to go away
285 before it can free the memory occupied by the m_base member. */
286 std::atomic_bool m_pending_free;
287
288 /** Pointer to the next node if any or NULL. This begins its life
289 as NULL and is only changed once to some real value. Never changed
290 to another value after that. */
291 std::atomic<next_t> m_next;
292
293 private:
294 /** Count the number of deleted elements. The value returned could
295 be inaccurate because it is obtained without any locks.
296 @param[in] deleted_val the constant that designates that
297 a value is deleted
298 @return the number of deleted elements */
299 size_t n_deleted(int64_t deleted_val) const {
300 size_t ret = 0;
301
302 for (size_t i = 0; i < m_n_base_elements; i++) {
303 const int64_t val = m_base[i].m_val.load(std::memory_order_relaxed);
304
305 if (val == deleted_val) {
306 ret++;
307 }
308 }
309
310 return (ret);
311 }
312
313 /** Counter for the current number of readers and writers to this
314 object. This object is destroyed only after it is removed from the
315 list, so that no new readers or writers may arrive, and after this
316 counter has dropped to zero. */
318};
319
320/** Lock free hash table. It stores (key, value) pairs where both the key
321and the value are of integer type.
322* The possible keys are:
323 * UNUSED: a (key = UNUSED, val) tuple means that it is empty/unused. This is
324 the initial state of all keys.
325 * AVOID: a (key = AVOID, val = any) tuple means that this tuple is disabled,
326 does not contain a real data and should be avoided by searches and
327 inserts. This is used when migrating all elements of an array to the
328 next array.
329 * real key: anything other than the above means this is a real key,
330 specified by the user.
331
332* The possible values are:
333 * NOT_FOUND: a (key, val = NOT_FOUND) tuple means that it is just being
334 inserted and returning "not found" is ok. This is the initial state of all
335 values.
336 * DELETED: a (key, val = DELETED) tuple means that a tuple with this key
337 existed before but was deleted at some point. Searches for this key return
338 "not found" and inserts reuse the tuple, replacing the DELETED value with
339 something else.
340 * GOTO_NEXT_ARRAY: a (key, val = GOTO_NEXT_ARRAY) tuple means that the
341 searches for this tuple (get and insert) should go to the next array and
342 repeat the search there. Used when migrating all tuples from one array to
343 the next.
344 * real value: anything other than the above means this is a real value,
345 specified by the user.
346
347* Transitions for keys (a real key is anything other than UNUSED and AVOID):
348 * UNUSED -> real key -- allowed
349 * UNUSED -> AVOID -- allowed
350 anything else is not allowed:
351 * real key -> UNUSED -- not allowed
352 * real key -> AVOID -- not allowed
353 * real key -> another real key -- not allowed
354 * AVOID -> UNUSED -- not allowed
355 * AVOID -> real key -- not allowed
356
357* Transitions for values (a real value is anything other than NOT_FOUND,
358 DELETED and GOTO_NEXT_ARRAY):
359 * NOT_FOUND -> real value -- allowed
360 * NOT_FOUND -> DELETED -- allowed
361 * real value -> another real value -- allowed
362 * real value -> DELETED -- allowed
363 * real value -> GOTO_NEXT_ARRAY -- allowed
364 * DELETED -> real value -- allowed
365 * DELETED -> GOTO_NEXT_ARRAY -- allowed
366 anything else is not allowed:
367 * NOT_FOUND -> GOTO_NEXT_ARRAY -- not allowed
368 * real value -> NOT_FOUND -- not allowed
369 * DELETED -> NOT_FOUND -- not allowed
370 * GOTO_NEXT_ARRAY -> real value -- not allowed
371 * GOTO_NEXT_ARRAY -> NOT_FOUND -- not allowed
372 * GOTO_NEXT_ARRAY -> DELETED -- not allowed
373*/
375 public:
376 /** Constructor. Not thread safe.
377 @param[in] initial_size number of elements to allocate
378 initially. Must be a power of 2, greater than 0.
379 @param[in] del_when_zero if true then automatically delete a
380 tuple from the hash if due to increment or decrement its value becomes
381 zero. */
382 explicit ut_lock_free_hash_t(size_t initial_size, bool del_when_zero)
383 : m_del_when_zero(del_when_zero)
384#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
385 ,
386 m_n_search(0),
387 m_n_search_iterations(0)
388#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
389 {
390 ut_a(initial_size > 0);
391 ut_a(ut_is_2pow(initial_size));
392
393 m_data.store(arr_node_t::alloc(initial_size));
394
396
397 m_hollow_objects = ut::new_withkey<hollow_t>(
400 }
401
402 /** Destructor. Not thread safe. */
405
406 arr_node_t *arr = m_data.load();
407
408 do {
409 arr_node_t *next = arr->m_next.load();
410
412
413 arr = next;
414 } while (arr != nullptr);
415
416 while (!m_hollow_objects->empty()) {
418 m_hollow_objects->pop_front();
419 }
421 }
422
423 /** Get the value mapped to a given key.
424 @param[in] key key to look for
425 @return the value that corresponds to key or NOT_FOUND. */
426 int64_t get(uint64_t key) const override {
427 ut_ad(key != UNUSED);
428 ut_ad(key != AVOID);
429
430 arr_node_t *arr = m_data.load();
431
432 for (;;) {
433 auto handle_and_tuple{get_tuple(key, &arr)};
434 const auto tuple = handle_and_tuple.second;
435
436 if (tuple == nullptr) {
437 return (NOT_FOUND);
438 }
439
440 /* Here if another thread is just setting this key
441 for the first time, then the tuple could be
442 (key, NOT_FOUND) (remember all vals are initialized
443 to NOT_FOUND initially) in which case we will return
444 NOT_FOUND below which is fine. */
445
446 int64_t v = tuple->m_val.load(std::memory_order_relaxed);
447
448 if (v == DELETED) {
449 return (NOT_FOUND);
450 } else if (v != GOTO_NEXT_ARRAY) {
451 return (v);
452 }
453
454 /* Prevent reorder of the below m_next.load() with
455 the above m_val.load().
456 We want to be sure that if m_val is GOTO_NEXT_ARRAY,
457 then the next array exists. */
458
459 arr = arr->m_next.load();
460 }
461 }
462
463 /** Set the value for a given key, either inserting a new (key, val)
464 tuple or overwriting an existent value. If two threads call this
465 method at the same time with the key, but different val, then when
466 both methods have finished executing the value will be one of the
467 two ones, but undeterministic which one. E.g.
468 Thread 1: set(key, val_a)
469 Thread 2: set(key, val_b)
470 when both have finished, then a tuple with the given key will be
471 present with value either val_a or val_b.
472 @param[in] key key whose value to set
473 @param[in] val value to be set */
474 void set(uint64_t key, int64_t val) override {
475 ut_ad(key != UNUSED);
476 ut_ad(key != AVOID);
477 ut_ad(val != NOT_FOUND);
478 ut_ad(val != DELETED);
479 ut_ad(val != GOTO_NEXT_ARRAY);
480
481 insert_or_update(key, val, false, m_data.load());
482 }
483
484 /** Delete a (key, val) pair from the hash.
485 If this gets called concurrently with get(), inc(), dec() or set(),
486 then to the caller it will look like the calls executed in isolation,
487 the hash structure itself will not be damaged, but it is undefined in
488 what order the calls will be executed. For example:
489 Let this tuple exist in the hash: (key == 5, val == 10)
490 Thread 1: inc(key == 5)
491 Thread 2: del(key == 5)
492 [1] If inc() executes first then the tuple will become
493 (key == 5, val == 11) and then del() will make it
494 (key == 5, val == DELETED), which get()s for key == 5 will return as
495 NOT_FOUND.
496 [2] If del() executes first then the tuple will become
497 (key == 5, val == DELETED) and then inc() will change it to
498 (key == 5, value == 1).
499 It is undefined which one of [1] or [2] will happen. It is up to the
500 caller to accept this behavior or prevent it at a higher level.
501 @param[in] key key whose pair to delete */
502 void del(uint64_t key) override {
503 ut_ad(key != UNUSED);
504 ut_ad(key != AVOID);
505
506 arr_node_t *arr = m_data.load();
507
508 for (;;) {
509 auto handle_and_tuple{get_tuple(key, &arr)};
510 const auto tuple = handle_and_tuple.second;
511
512 if (tuple == nullptr) {
513 /* Nothing to delete. */
514 return;
515 }
516
517 int64_t v = tuple->m_val.load(std::memory_order_relaxed);
518
519 for (;;) {
520 if (v == GOTO_NEXT_ARRAY) {
521 break;
522 }
523
524 if (tuple->m_val.compare_exchange_strong(v, DELETED)) {
525 return;
526 }
527
528 /* CAS stored the most recent value of 'm_val'
529 into 'v'. */
530 }
531
532 /* Prevent reorder of the below m_next.load() with the
533 above m_val.load() or the load from
534 m_val.compare_exchange_strong().
535 We want to be sure that if m_val is GOTO_NEXT_ARRAY,
536 then the next array exists. */
537
538 arr = arr->m_next.load();
539 }
540 }
541
542 /** Increment the value for a given key with 1 or insert a new tuple
543 (key, 1).
544 If two threads call this method at the same time with the same key,
545 then it is guaranteed that when both calls have finished, the value
546 will be incremented with 2.
547 If two threads call this method and set() at the same time with the
548 same key it is undeterministic whether the value will be what was
549 given to set() or what was given to set() + 1. E.g.
550 Thread 1: set(key, val)
551 Thread 2: inc(key)
552 or
553 Thread 1: inc(key)
554 Thread 2: set(key, val)
555 when both have finished the value will be either val or val + 1.
556 @param[in] key key whose value to increment or insert as 1 */
557 void inc(uint64_t key) override {
558 ut_ad(key != UNUSED);
559 ut_ad(key != AVOID);
560
561 insert_or_update(key, 1, true, m_data.load());
562 }
563
564 /** Decrement the value of a given key with 1 or insert a new tuple
565 (key, -1).
566 With respect to calling this together with set(), inc() or dec() the
567 same applies as with inc(), see its comment. The only guarantee is
568 that the calls will execute in isolation, but the order in which they
569 will execute is undeterministic.
570 @param[in] key key whose value to decrement */
571 void dec(uint64_t key) override {
572 ut_ad(key != UNUSED);
573 ut_ad(key != AVOID);
574
575 insert_or_update(key, -1, true, m_data.load());
576 }
577
578#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
579 /** Print statistics about how many searches have been done on the hash
580 and how many collisions. */
581 void print_stats() {
582 const ulint n_search = m_n_search;
583 const ulint n_search_iterations = m_n_search_iterations;
584
585 ib::info info(ER_IB_MSG_LOCK_FREE_HASH_USAGE_STATS);
586 info << "Lock free hash usage stats: number of searches=" << n_search
587 << ", number of search iterations=" << n_search_iterations;
588 if (n_search != 0) {
589 info << "average iterations per search: "
590 << (double)n_search_iterations / n_search;
591 }
592 }
593#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
594
595 private:
596 /** A key == UNUSED designates that this cell in the array is empty. */
597 static const uint64_t UNUSED = UINT64_MAX;
598
599 /** A key == AVOID designates an unusable cell. This cell of the array
600 has been empty (key == UNUSED), but was then marked as AVOID in order
601 to prevent new inserts into it. Searches should treat this like
602 UNUSED (ie if they encounter it before the key they are searching for
603 then stop the search and declare 'not found'). */
604 static const uint64_t AVOID = UNUSED - 1;
605
606 /** A val == DELETED designates that this cell in the array has been
607 used in the past, but it was deleted later. Searches should return
608 NOT_FOUND when they encounter it. */
609 static const int64_t DELETED = NOT_FOUND - 1;
610
611 /** A val == GOTO_NEXT_ARRAY designates that this tuple (key, whatever)
612 has been moved to the next array. The search for it should continue
613 there. */
614 static const int64_t GOTO_NEXT_ARRAY = DELETED - 1;
615
616 /** (key, val) tuple type. */
617 struct key_val_t {
619
620 /** Key. */
621 std::atomic<uint64_t> m_key;
622
623 /** Value. */
624 std::atomic<int64_t> m_val;
625 };
626
627 /** An array node in the hash. The hash table consists of a linked
628 list of such nodes. */
630
631 /** A hash function used to map a key to its suggested position in the
632 array. A linear search to the right is done after this position to find
633 the tuple with the given key or find a tuple with key == UNUSED or
634 AVOID which means that the key is not present in the array.
635 @param[in] key key to map into a position
636 @param[in] arr_size number of elements in the array
637 @return a position (index) in the array where the tuple is guessed
638 to be */
639 size_t guess_position(uint64_t key, size_t arr_size) const {
640 /* Implement a better hashing function to map
641 [0, UINT64_MAX] -> [0, arr_size - 1] if this one turns
642 out to generate too many collisions. */
643
644 /* arr_size is a power of 2. */
645 return (static_cast<size_t>(ut::hash_uint64(key) & (arr_size - 1)));
646 }
647
648 /** Get the array cell of a key from a given array.
649 @param[in] arr array to search into
650 @param[in] arr_size number of elements in the array
651 @param[in] key search for a tuple with this key
652 @return pointer to the array cell or NULL if not found */
654 uint64_t key) const {
655#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
656 ++m_n_search;
657#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
658
659 const size_t start = guess_position(key, arr_size);
660 const size_t end = start + arr_size;
661
662 for (size_t i = start; i < end; i++) {
663#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
664 ++m_n_search_iterations;
665#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
666
667 /* arr_size is a power of 2. */
668 const size_t cur_pos = i & (arr_size - 1);
669
670 key_val_t *cur_tuple = &arr[cur_pos];
671
672 const uint64_t cur_key = cur_tuple->m_key.load(std::memory_order_relaxed);
673
674 if (cur_key == key) {
675 return (cur_tuple);
676 } else if (cur_key == UNUSED || cur_key == AVOID) {
677 return (nullptr);
678 }
679 }
680
681 return (nullptr);
682 }
683
684 /** Get the array cell of a key.
685 @param[in] key key to search for
686 @param[in,out] arr start the search from this array; when this
687 method ends, *arr will point to the array in which the search
688 ended (in which the returned key_val resides)
689 @return If key was found: handle to the (updated) arr which contains the tuple
690 and pointer to the array cell with the tuple. Otherwise an empty handle, and
691 nullptr. */
692 std::pair<ut_lock_free_cnt_t::handle_t, key_val_t *> get_tuple(
693 uint64_t key, arr_node_t **arr) const {
694 for (;;) {
695 auto handle = (*arr)->begin_access();
696 if (!handle) {
697 /* The array has been garbaged, restart the search from the beginning.*/
698 *arr = m_data.load();
699 continue;
700 }
701
702 key_val_t *t = get_tuple_from_array((*arr)->m_base.get(),
703 (*arr)->m_n_base_elements, key);
704
705 if (t != nullptr) {
706 return {std::move(handle), t};
707 }
708
709 *arr = (*arr)->m_next.load();
710
711 if (*arr == nullptr) {
712 return {};
713 }
714 }
715 }
716
717 /** Insert the given key into a given array or return its cell if
718 already present.
719 @param[in] arr array into which to search and insert
720 @param[in] arr_size number of elements in the array
721 @param[in] key key to insert or whose cell to retrieve
722 @return a pointer to the inserted or previously existent tuple or NULL
723 if a tuple with this key is not present in the array and the array is
724 full, without any unused cells and thus insertion cannot be done into
725 it. */
727 uint64_t key) {
728#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
729 ++m_n_search;
730#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
731
732 const size_t start = guess_position(key, arr_size);
733 const size_t end = start + arr_size;
734
735 for (size_t i = start; i < end; i++) {
736#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
737 ++m_n_search_iterations;
738#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
739
740 /* arr_size is a power of 2. */
741 const size_t cur_pos = i & (arr_size - 1);
742
743 key_val_t *cur_tuple = &arr[cur_pos];
744
745 const uint64_t cur_key = cur_tuple->m_key.load(std::memory_order_relaxed);
746
747 if (cur_key == key) {
748 return (cur_tuple);
749 }
750
751 if (cur_key == UNUSED) {
752 uint64_t expected = UNUSED;
753 if (cur_tuple->m_key.compare_exchange_strong(expected, key)) {
754 return (cur_tuple);
755 }
756
757 /* CAS failed, which means that some other
758 thread just changed the current key from UNUSED
759 to something else (which is stored in
760 'expected'). See if the new value is 'key'. */
761 if (expected == key) {
762 return (cur_tuple);
763 }
764
765 /* The current key, which was UNUSED, has been
766 replaced with something else (!= key) by a
767 concurrently executing insert. Keep searching
768 for a free slot. */
769 }
770
771 /* Skip through tuples with key == AVOID. */
772 }
773
774 return (nullptr);
775 }
776
777 /** Copy all used elements from one array to another. Flag the ones
778 in the old array as 'go to the next array'.
779 @param[in,out] src_arr array to copy from
780 @param[in,out] dst_arr array to copy to */
781 void copy_to_another_array(arr_node_t *src_arr, arr_node_t *dst_arr) {
782 for (size_t i = 0; i < src_arr->m_n_base_elements; i++) {
783 key_val_t *t = &src_arr->m_base[i];
784
785 uint64_t k = t->m_key.load(std::memory_order_relaxed);
786
787 /* Prevent further inserts into empty cells. */
788 if (k == UNUSED && t->m_key.compare_exchange_strong(k, AVOID)) {
789 continue;
790 }
791
792 int64_t v = t->m_val.load(std::memory_order_relaxed);
793
794 /* Insert (k, v) into the destination array. We know
795 that nobody else will try this concurrently with this
796 thread because:
797 * this code is being executed by just one thread (the
798 thread that managed to grow the list of arrays) and
799 * other normal inserts/updates with (key == k) will
800 pick the entry in src_arr. */
801
802 /* If we copied the tuple to dst_arr once then we must
803 keep trying to transfer its most recent value to
804 dst_arr, even if its value becomes DELETED. Otherwise
805 a delete operation on this tuple which executes
806 concurrently with the loop below may be lost. */
807 bool copied = false;
808
809 for (;;) {
810 if (v != DELETED || copied) {
811 insert_or_update(k, v, false, dst_arr, false);
812 copied = true;
813 }
814
815 /* Prevent any preceding memory operations (the
816 stores from insert_or_update() in particular)
817 to be reordered past the store from
818 m_val.compare_exchange_strong() below. We want
819 to be sure that if m_val is GOTO_NEXT_ARRAY,
820 then the entry is indeed present in some of the
821 next arrays (ie that insert_or_update() has
822 completed and that its effects are visible to
823 other threads). */
824
825 /* Now that we know (k, v) is present in some
826 of the next arrays, try to CAS the tuple
827 (k, v) to (k, GOTO_NEXT_ARRAY) in the current
828 array. */
829
830 if (t->m_val.compare_exchange_strong(v, GOTO_NEXT_ARRAY)) {
831 break;
832 }
833
834 /* If CAS fails, this means that m_val has been
835 changed in the meantime and the CAS will store
836 m_val's most recent value in 'v'. Retry both
837 operations (this time the insert_or_update()
838 call will be an update, rather than an
839 insert). */
840 }
841 }
842 }
843
844 /** Update the value of a given tuple.
845 @param[in,out] t tuple whose value to update
846 @param[in] val_to_set value to set or delta to apply
847 @param[in] is_delta if true then set the new value to
848 old + val, otherwise just set to val
849 @retval true update succeeded
850 @retval false update failed due to GOTO_NEXT_ARRAY
851 @return whether the update succeeded or not */
852 bool update_tuple(key_val_t *t, int64_t val_to_set, bool is_delta) {
853 int64_t cur_val = t->m_val.load(std::memory_order_relaxed);
854
855 for (;;) {
856 if (cur_val == GOTO_NEXT_ARRAY) {
857 return (false);
858 }
859
860 int64_t new_val;
861
862 if (is_delta && cur_val != NOT_FOUND && cur_val != DELETED) {
863 if (m_del_when_zero && cur_val + val_to_set == 0) {
864 new_val = DELETED;
865 } else {
866 new_val = cur_val + val_to_set;
867 }
868 } else {
869 new_val = val_to_set;
870 }
871
872 if (t->m_val.compare_exchange_strong(cur_val, new_val)) {
873 return (true);
874 }
875
876 /* When CAS fails it sets the most recent value of
877 m_val into cur_val. */
878 }
879 }
880
881 /** Optimize the hash table. Called after a new array is appended to
882 the list of arrays (grow). Having more than one array in the list
883 of arrays works, but is suboptimal because searches
884 (for get/insert/update) have to search in more than one array. This
885 method starts from the head of the list and migrates all tuples from
886 this array to the next arrays. Then it removes the array from the
887 head of the list, waits for readers to go away and frees it's m_base
888 member. */
889 void optimize() {
891
892 for (;;) {
893 arr_node_t *arr = m_data.load();
894
895 arr_node_t *next = arr->m_next.load();
896
897 if (next == nullptr) {
898 break;
899 }
900
901 /* begin_access() (ref counting) for 'arr' and 'next'
902 around copy_to_another_array() is not needed here
903 because the only code that frees memory is below,
904 serialized with a mutex. */
905
906 copy_to_another_array(arr, next);
907
908 arr->m_pending_free.store(true);
909
910 arr_node_t *expected = arr;
911
912 /* Detach 'arr' from the list. Ie move the head of the
913 list 'm_data' from 'arr' to 'arr->m_next'. */
914 ut_a(m_data.compare_exchange_strong(expected, next));
915
916 /* Spin/wait for all threads to stop looking at
917 this array. If at some point this turns out to be
918 sub-optimal (ie too long busy wait), then 'arr' could
919 be added to some lazy deletion list
920 arrays-awaiting-destruction-once-no-readers. */
922
923 arr->m_base.reset();
924
925 m_hollow_objects->push_back(arr);
926 }
927
929 }
930
931 /** Insert a new tuple or update an existent one. If a tuple with this
932 key does not exist then a new one is inserted (key, val) and is_delta
933 is ignored. If a tuple with this key exists and is_delta is true, then
934 the current value is changed to be current value + val, otherwise it
935 is overwritten to be val.
936 @param[in] key key to insert or whose value
937 to update
938 @param[in] val value to set; if the tuple
939 does not exist or if is_delta is false, then the new value is set
940 to val, otherwise it is set to old + val
941 @param[in] is_delta if true then set the new
942 value to old + val, otherwise just set to val.
943 @param[in] arr array to start the search from
944 @param[in] optimize_allowed if true then call optimize()
945 after an eventual grow(), if false, then never call optimize(). Used
946 to prevent recursive optimize() call by insert_or_update() ->
947 optimize() -> copy_to_another_array() -> insert_or_update() ->
948 optimize(). */
949 void insert_or_update(uint64_t key, int64_t val, bool is_delta,
950 arr_node_t *arr, bool optimize_allowed = true) {
951 bool call_optimize = false;
952
953 /* Loop through the arrays until we find a free slot to insert
954 or until we find a tuple with the specified key and manage to
955 update it. */
956 for (;;) {
957 auto handle = arr->begin_access();
958 if (!handle) {
959 /* The array has been garbaged, try the next one. */
960 arr = arr->m_next.load();
961 continue;
962 }
963
965 arr->m_base.get(), arr->m_n_base_elements, key);
966
967 /* t == NULL means that the array is full, must expand
968 and go to the next array. */
969
970 /* update_tuple() returning false means that the value
971 of the tuple is GOTO_NEXT_ARRAY, so we must go to the
972 next array. */
973
974 if (t != nullptr && update_tuple(t, val, is_delta)) {
975 break;
976 }
977
978 arr_node_t *next = arr->m_next.load();
979
980 if (next != nullptr) {
981 arr = next;
982 /* Prevent any subsequent memory operations
983 (the reads from the next array in particular)
984 to be reordered before the m_next.load()
985 above. */
986 continue;
987 }
988
989 bool grown_by_this_thread;
990
991 arr = arr->grow(DELETED, &grown_by_this_thread);
992
993 if (grown_by_this_thread) {
994 call_optimize = true;
995 }
996 }
997
998 if (optimize_allowed && call_optimize) {
999 optimize();
1000 }
1001 }
1002
1003 /** Storage for the (key, val) tuples. */
1004 std::atomic<arr_node_t *> m_data;
1005
1007 typedef std::list<arr_node_t *, hollow_alloc_t> hollow_t;
1008
1009 /** Container for hollow (semi-destroyed) objects that have been
1010 removed from the list that starts at m_data. Those objects have their
1011 m_base member freed and are entirely destroyed at the end of the hash
1012 table life time. The access to this member is protected by
1013 m_optimize_latch (adding of new elements) and the removal of elements
1014 is done in the destructor of the hash table. */
1016
1017 /** Concurrent copy-all-elements-to-the-next-array, removal of the
1018 head of the list and freeing of its m_base member are serialized with
1019 this latch. Those operations could be implemented without serialization,
1020 but this immensely increases the complexity of the code. Growing of the
1021 hash table is not too hot operation and thus we chose simplicity and
1022 maintainability instead of top performance in this case. "Get"
1023 operations and "insert/update" ones that do not cause grow still
1024 run concurrently even if this latch is locked. */
1026
1027 /** True if a tuple should be automatically deleted from the hash
1028 if its value becomes 0 after an increment or decrement. */
1030
1031#ifdef UT_HASH_IMPLEMENT_PRINT_STATS
1032 /* The atomic type gives correct results, but has a _huge_
1033 performance impact. The unprotected operation gives a significant
1034 skew, but has almost no performance impact. */
1035
1036 /** Number of searches performed in this hash. */
1037 mutable std::atomic<uint64_t> m_n_search;
1038
1039 /** Number of elements processed for all searches. */
1040 mutable std::atomic<uint64_t> m_n_search_iterations;
1041#endif /* UT_HASH_IMPLEMENT_PRINT_STATS */
1042};
1043
1044#endif /* ut0lock_free_hash_h */
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:251
The class info is used to emit informational log messages.
Definition: ut0log.h:189
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2183
An interface class to a basic hash table, that ut_lock_free_hash_t is.
Definition: ut0lock_free_hash.h:52
static const int64_t NOT_FOUND
The value that is returned when the searched for key is not found.
Definition: ut0lock_free_hash.h:56
virtual void del(uint64_t key)=0
Delete a (key, val) pair from the hash.
virtual int64_t get(uint64_t key) const =0
Get the value mapped to a given key.
virtual void dec(uint64_t key)=0
Decrement the value of a given key with 1 or insert a new tuple (key, -1).
virtual void inc(uint64_t key)=0
Increment the value for a given key with 1 or insert a new tuple (key, 1).
virtual void set(uint64_t key, int64_t val)=0
Set the value for a given key, either inserting a new (key, val) tuple or overwriting an existent val...
virtual ~ut_hash_interface_t()=default
Destructor.
Definition: ut0lock_free_hash.h:106
handle_t(handle_t &&other) noexcept
Definition: ut0lock_free_hash.h:114
std::atomic< uint64_t > * m_counter
Definition: ut0lock_free_hash.h:127
handle_t(std::atomic< uint64_t > *counter)
Definition: ut0lock_free_hash.h:110
~handle_t()
Definition: ut0lock_free_hash.h:120
handle_t()
Definition: ut0lock_free_hash.h:108
Lock free ref counter.
Definition: ut0lock_free_hash.h:95
ut_lock_free_cnt_t()
Constructor.
Definition: ut0lock_free_hash.h:98
size_t n_cnt_index() const
Derive an appropriate index in m_cnt[] for the current thread.
Definition: ut0lock_free_hash.h:150
handle_t reference()
Increment the counter.
Definition: ut0lock_free_hash.h:131
void await_release_of_old_references() const
Wait until all previously existing references get released.
Definition: ut0lock_free_hash.h:139
std::array< ut::Cacheline_aligned< std::atomic< uint64_t > >, 256 > m_cnt
The shards of the counter.
Definition: ut0lock_free_hash.h:171
Lock free hash table.
Definition: ut0lock_free_hash.h:374
void set(uint64_t key, int64_t val) override
Set the value for a given key, either inserting a new (key, val) tuple or overwriting an existent val...
Definition: ut0lock_free_hash.h:474
static const int64_t DELETED
A val == DELETED designates that this cell in the array has been used in the past,...
Definition: ut0lock_free_hash.h:609
void inc(uint64_t key) override
Increment the value for a given key with 1 or insert a new tuple (key, 1).
Definition: ut0lock_free_hash.h:557
bool update_tuple(key_val_t *t, int64_t val_to_set, bool is_delta)
Update the value of a given tuple.
Definition: ut0lock_free_hash.h:852
bool m_del_when_zero
True if a tuple should be automatically deleted from the hash if its value becomes 0 after an increme...
Definition: ut0lock_free_hash.h:1029
static const uint64_t UNUSED
A key == UNUSED designates that this cell in the array is empty.
Definition: ut0lock_free_hash.h:597
size_t guess_position(uint64_t key, size_t arr_size) const
A hash function used to map a key to its suggested position in the array.
Definition: ut0lock_free_hash.h:639
ut_lock_free_list_node_t< key_val_t > arr_node_t
An array node in the hash.
Definition: ut0lock_free_hash.h:629
void optimize()
Optimize the hash table.
Definition: ut0lock_free_hash.h:889
void insert_or_update(uint64_t key, int64_t val, bool is_delta, arr_node_t *arr, bool optimize_allowed=true)
Insert a new tuple or update an existent one.
Definition: ut0lock_free_hash.h:949
ib_mutex_t m_optimize_latch
Concurrent copy-all-elements-to-the-next-array, removal of the head of the list and freeing of its m_...
Definition: ut0lock_free_hash.h:1025
void dec(uint64_t key) override
Decrement the value of a given key with 1 or insert a new tuple (key, -1).
Definition: ut0lock_free_hash.h:571
std::pair< ut_lock_free_cnt_t::handle_t, key_val_t * > get_tuple(uint64_t key, arr_node_t **arr) const
Get the array cell of a key.
Definition: ut0lock_free_hash.h:692
static const uint64_t AVOID
A key == AVOID designates an unusable cell.
Definition: ut0lock_free_hash.h:604
key_val_t * insert_or_get_position_in_array(key_val_t *arr, size_t arr_size, uint64_t key)
Insert the given key into a given array or return its cell if already present.
Definition: ut0lock_free_hash.h:726
std::list< arr_node_t *, hollow_alloc_t > hollow_t
Definition: ut0lock_free_hash.h:1007
void del(uint64_t key) override
Delete a (key, val) pair from the hash.
Definition: ut0lock_free_hash.h:502
key_val_t * get_tuple_from_array(key_val_t *arr, size_t arr_size, uint64_t key) const
Get the array cell of a key from a given array.
Definition: ut0lock_free_hash.h:653
int64_t get(uint64_t key) const override
Get the value mapped to a given key.
Definition: ut0lock_free_hash.h:426
hollow_t * m_hollow_objects
Container for hollow (semi-destroyed) objects that have been removed from the list that starts at m_d...
Definition: ut0lock_free_hash.h:1015
void copy_to_another_array(arr_node_t *src_arr, arr_node_t *dst_arr)
Copy all used elements from one array to another.
Definition: ut0lock_free_hash.h:781
ut::allocator< arr_node_t * > hollow_alloc_t
Definition: ut0lock_free_hash.h:1006
ut_lock_free_hash_t(size_t initial_size, bool del_when_zero)
Constructor.
Definition: ut0lock_free_hash.h:382
static const int64_t GOTO_NEXT_ARRAY
A val == GOTO_NEXT_ARRAY designates that this tuple (key, whatever) has been moved to the next array.
Definition: ut0lock_free_hash.h:614
~ut_lock_free_hash_t() override
Destructor.
Definition: ut0lock_free_hash.h:403
std::atomic< arr_node_t * > m_data
Storage for the (key, val) tuples.
Definition: ut0lock_free_hash.h:1004
A node in a linked list of arrays.
Definition: ut0lock_free_hash.h:177
static ut_lock_free_list_node_t * alloc(size_t n_elements)
Definition: ut0lock_free_hash.h:192
std::atomic< next_t > m_next
Pointer to the next node if any or NULL.
Definition: ut0lock_free_hash.h:291
size_t n_deleted(int64_t deleted_val) const
Count the number of deleted elements.
Definition: ut0lock_free_hash.h:299
ut_lock_free_list_node_t< T > * next_t
Definition: ut0lock_free_hash.h:179
ut_lock_free_cnt_t m_n_ref
Counter for the current number of readers and writers to this object.
Definition: ut0lock_free_hash.h:317
std::atomic_bool m_pending_free
Indicate whether some thread is waiting for readers to go away before it can free the memory occupied...
Definition: ut0lock_free_hash.h:286
next_t grow(int64_t deleted_val, bool *grown_by_this_thread)
Create and append a new array to this one and store a pointer to it in 'm_next'.
Definition: ut0lock_free_hash.h:213
ut_lock_free_cnt_t::handle_t begin_access()
Mark the beginning of an access to this object.
Definition: ut0lock_free_hash.h:259
size_t m_n_base_elements
Number of elements in 'm_base'.
Definition: ut0lock_free_hash.h:282
static void dealloc(ut_lock_free_list_node_t *ptr)
Definition: ut0lock_free_hash.h:198
ut_lock_free_list_node_t(size_t n_elements)
Constructor.
Definition: ut0lock_free_hash.h:183
ut::unique_ptr< T[]> m_base
Base array.
Definition: ut0lock_free_hash.h:279
void await_release_of_old_references()
Wait until all previously held references are released.
Definition: ut0lock_free_hash.h:274
static void start(mysql_harness::PluginFuncEnv *env)
Definition: http_auth_backend_plugin.cc:180
uint counter
Definition: mysqlimport.cc:58
std::string print_stats(::recovery_statistics const &internal_stats, ::recovery_statistics const &external_stats)
Composes a string presenting the statistics for the given objects.
Definition: recovery.cc:365
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
Unique_ptr< T, std::nullptr_t > make_unique(size_t size)
In-place constructs a new unique pointer with no specific allocator and with array type T.
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
static int handle(int sql_errno, const char *sqlstate, const char *message, void *state)
Bridge function between the C++ API offered by this module and the C API of the parser service.
Definition: services.cc:64
This file contains a set of libraries providing overloads for regular dynamic allocation routines whi...
Definition: aligned_alloc.h:48
const thread_local size_t this_thread_hash
The hash value of the current thread's id.
Definition: os0thread.h:74
static uint64_t hash_uint64(uint64_t value)
Hashes a 64-bit integer.
Definition: ut0rnd.h:189
void delete_(T *ptr) noexcept
Releases storage which has been dynamically allocated through any of the ut::new*() variants.
Definition: ut0new.h:811
std::conditional_t< !std::is_array< T >::value, std::unique_ptr< T, detail::Deleter< T > >, std::conditional_t< detail::is_unbounded_array_v< T >, std::unique_ptr< T, detail::Array_deleter< std::remove_extent_t< T > > >, void > > unique_ptr
The following is a common type that is returned by all the ut::make_unique (non-aligned) specializati...
Definition: ut0new.h:2440
PSI_memory_key_t make_psi_memory_key(PSI_memory_key key)
Convenience helper function to create type-safe representation of PSI_memory_key.
Definition: ut0new.h:190
void aligned_delete(T *ptr) noexcept
Releases storage which has been dynamically allocated through any of the aligned_new_*() variants.
Definition: ut0new.h:1652
T * new_arr(Args &&...args)
Dynamically allocates storage for an array of T's.
Definition: ut0new.h:961
NUMA API wrapper over various operating system specific APIs.
The interface to the operating system process and thread control primitives.
required string key
Definition: replication_asynchronous_connection_failover.proto:60
(key, val) tuple type.
Definition: ut0lock_free_hash.h:617
key_val_t()
Definition: ut0lock_free_hash.h:618
std::atomic< int64_t > m_val
Value.
Definition: ut0lock_free_hash.h:624
std::atomic< uint64_t > m_key
Key.
Definition: ut0lock_free_hash.h:621
@ LATCH_ID_LOCK_FREE_HASH
Definition: sync0types.h:376
Version control for database, common definitions, and include files.
unsigned long int ulint
Definition: univ.i:406
Utilities related to CPU cache.
#define ut_ad(EXPR)
Debug assertion.
Definition: ut0dbg.h:105
#define ut_a(EXPR)
Abort execution if EXPR does not evaluate to nonzero.
Definition: ut0dbg.h:93
Policy based mutexes.
void mutex_destroy(Mutex *mutex)
Removes a mutex instance from the mutex list.
Definition: ut0mutex.h:248
#define mutex_exit(M)
Definition: ut0mutex.h:123
#define mutex_enter(M)
Definition: ut0mutex.h:117
#define mutex_create(I, M)
Definition: ut0mutex.h:110
Dynamic memory allocation routines and custom allocators specifically crafted to support memory instr...
PSI_memory_key mem_key_ut_lock_free_hash_t
Definition: ut0new.cc:65
Random numbers and hashing.
#define ut_is_2pow(n)
Determines if a number is zero or a power of two.
Definition: ut0ut.h:203