MySQL  8.0.18
Source Code Documentation
uniques.h
Go to the documentation of this file.
1 /* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License, version 2.0,
5  as published by the Free Software Foundation.
6 
7  This program is also distributed with certain software (including
8  but not limited to OpenSSL) that is licensed under separate terms,
9  as designated in a particular file or component or in included license
10  documentation. The authors of MySQL hereby grant you an additional
11  permission to link the program and your derivative works with the
12  separately licensed software that they have included with MySQL.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License, version 2.0, for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22 
23 #ifndef UNIQUES_INCLUDED
24 #define UNIQUES_INCLUDED
25 
26 #include <stddef.h>
27 #include <sys/types.h>
28 
29 #include "my_dbug.h"
30 #include "my_inttypes.h"
31 #include "my_pointer_arithmetic.h"
32 #include "my_sys.h"
33 #include "my_tree.h" // TREE
34 #include "prealloced_array.h" // Prealloced_array
35 #include "sql/sql_array.h"
36 #include "sql/sql_sort.h" // IWYU pragma: keep
37 
38 class Cost_model_table;
39 struct TABLE;
40 
41 /**
42  Unique -- class for unique (removing of duplicates).
43  Puts all values to the TREE. If the tree becomes too big,
44  it's dumped to the file. User can request sorted values, or
45  just iterate through them. In the last case tree merging is performed in
46  memory simultaneously with iteration, so it should be ~2-3x faster.
47 
48  Unique values can be read only from final result (not on insert) because
49  duplicate values can be contained in different dumped tree files.
50 */
51 
52 class Unique {
53  /// Array of file pointers
55  /// Max elements in memory buffer
57  /// Memory buffer size
59  /// Cache file for unique values retrieval fo table read AM in executor
61  /// Tree to filter duplicates in memory
64  /// Flush tree to disk
65  bool flush();
66  /// Element size
68 
69  public:
71  Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg,
72  ulonglong max_in_memory_size_arg);
73  ~Unique();
75 
76  /**
77  Add new value to Unique
78 
79  @details The value is inserted either to the tree, or to the duplicate
80  weedout table, depending on the mode of operation. If tree's mem buffer is
81  full, it's flushed to the disk.
82 
83  @param ptr pointer to the binary string to insert
84 
85  @returns
86  false error or duplicate
87  true the value was inserted
88  */
89  inline bool unique_add(void *ptr) {
90  DBUG_TRACE;
91  DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
92  if (tree.elements_in_tree > max_elements && flush()) return 1;
93  return !tree_insert(&tree, ptr, 0, tree.custom_arg);
94  }
95 
96  bool get(TABLE *table);
97 
99 
100  static double get_use_cost(Imerge_cost_buf_type buffer, uint nkeys,
102  const Cost_model_table *cost_model);
103 
104  // Returns the number of elements needed in Imerge_cost_buf_type.
105  inline static size_t get_cost_calc_buff_size(ulong nkeys, uint key_size,
107  ulonglong max_elems_in_tree =
108  (max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT) + key_size));
109  return 1 + static_cast<size_t>(nkeys / max_elems_in_tree);
110  }
111 
112  void reset();
113  bool walk(tree_walk_action action, void *walk_action_arg);
114 
115  uint get_size() const { return size; }
117  bool is_in_memory() { return elements == 0; }
118  friend int unique_write_to_file(void *v_key, element_count count,
119  void *unique);
120  friend int unique_write_to_ptrs(void *v_key, element_count count,
121  void *unique);
122 };
123 
124 /**
125  Unique_on_insert -- similar to above, but rejects duplicates on insert, not
126  just on read of the final result.
127  To achieve this values are inserted into mem tmp table which uses index to
128  detect duplicate keys. When memory buffer is full, tmp table is dumped to a
129  disk-based tmp table.
130 */
131 
133  /// Element size
135  /// Duplicate weedout tmp table
136  TABLE *m_table{nullptr};
137 
138  public:
139  Unique_on_insert(uint size) : m_size(size) {}
140  /**
141  Add row id to the filter
142 
143  @param ptr pointer to the rowid
144 
145  @returns
146  false rowid successfully inserted
147  true duplicate or error
148  */
149  bool unique_add(void *ptr);
150 
151  /**
152  Initialize duplicate filter - allocate duplicate weedout tmp table
153 
154  @returns
155  false initialization succeeded
156  true an error occur
157  */
158  bool init();
159 
160  /**
161  Reset filter - drop all rowid records
162 
163  @param reinit Whether to restart index scan
164  */
165  void reset(bool reinit);
166 
167  /**
168  Cleanup unique filter
169  */
170  void cleanup();
171 };
172 
173 #endif // UNIQUES_INCLUDED
uint get_size() const
Definition: uniques.h:115
static size_t get_cost_calc_buff_size(ulong nkeys, uint key_size, ulonglong max_in_memory_size)
Definition: uniques.h:105
Prealloced_array< Merge_chunk, 16 > file_ptrs
Array of file pointers.
Definition: uniques.h:54
unsigned long long int ulonglong
Definition: my_inttypes.h:55
unsigned char uchar
Definition: my_inttypes.h:51
ulong max_elements
Max elements in memory buffer.
Definition: uniques.h:56
friend int unique_write_to_ptrs(void *v_key, element_count count, void *unique)
Definition: uniques.cc:347
ssize_t count
Definition: memcached.c:386
void reset(bool reinit)
Reset filter - drop all rowid records.
Definition: uniques.cc:987
char buffer[STRING_BUFFER]
Definition: test_sql_9_sessions.cc:57
static double get_use_cost(Imerge_cost_buf_type buffer, uint nkeys, uint key_size, ulonglong max_in_memory_size, const Cost_model_table *cost_model)
Definition: uniques.cc:574
ulong elements_in_tree()
Definition: uniques.h:74
Some integer typedefs for easier portability.
bool init()
Initialize duplicate filter - allocate duplicate weedout tmp table.
Definition: uniques.cc:994
uint size
Element size.
Definition: uniques.h:67
Unique_on_insert(uint size)
Definition: uniques.h:139
Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg, ulonglong max_in_memory_size_arg)
Definition: uniques.cc:355
TREE tree
Tree to filter duplicates in memory.
Definition: uniques.h:62
ulong elements
Definition: uniques.h:70
int(* qsort2_cmp)(const void *, const void *, const void *)
Definition: my_sys.h:483
Unique – class for unique (removing of duplicates).
Definition: uniques.h:52
uchar * record_pointers
Definition: uniques.h:63
TREE_ELEMENT * tree_insert(TREE *tree, void *key, uint key_size, const void *custom_arg)
Definition: tree.cc:200
void cleanup()
Cleanup unique filter.
Definition: uniques.cc:1002
bool flush()
Flush tree to disk.
Definition: uniques.cc:632
Definition: table.h:1301
#define DBUG_PRINT(keyword, arglist)
Definition: my_dbug.h:179
uint32 element_count
Definition: my_tree.h:51
ulonglong get_max_in_memory_size() const
Definition: uniques.h:116
ulonglong max_in_memory_size
Memory buffer size.
Definition: uniques.h:58
~Unique()
Definition: uniques.cc:626
Definition: my_tree.h:67
Definition: my_sys.h:360
unsigned int uint
Definition: uca-dump.cc:29
bool unique_add(void *ptr)
Add new value to Unique.
Definition: uniques.h:89
bool is_in_memory()
Definition: uniques.h:117
Common header for many mysys elements.
DBUG_TRACE
Definition: do_ctype.cc:46
Some macros for dealing with pointer arithmetic, e.g., aligning of buffers to a given size...
Definition: my_tree.h:57
const void * custom_arg
Definition: my_tree.h:73
Unique_on_insert – similar to above, but rejects duplicates on insert, not just on read of the final...
Definition: uniques.h:132
int(* tree_walk_action)(void *, element_count, void *)
Definition: my_tree.h:52
void reset()
Definition: uniques.cc:650
bool walk(tree_walk_action action, void *walk_action_arg)
Definition: uniques.cc:857
Bounds_checked_array< uint > Imerge_cost_buf_type
Definition: uniques.h:98
TABLE * m_table
Duplicate weedout tmp table.
Definition: uniques.h:136
uint m_size
Element size.
Definition: uniques.h:134
friend int unique_write_to_file(void *v_key, element_count count, void *unique)
Definition: uniques.cc:335
API for getting cost estimates for operations on table data.
Definition: opt_costmodel.h:228
bool unique_add(void *ptr)
Add row id to the filter.
Definition: uniques.cc:971
uint elements_in_tree
Definition: my_tree.h:70
unsigned long ulong
Definition: my_inttypes.h:48
#define ALIGN_SIZE(A)
Definition: my_pointer_arithmetic.h:35
IO_CACHE file
Cache file for unique values retrieval fo table read AM in executor.
Definition: uniques.h:60