MySQL 8.2.0
Source Code Documentation
uniques.h
Go to the documentation of this file.
1/* Copyright (c) 2001, 2023, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23#ifndef UNIQUES_INCLUDED
24#define UNIQUES_INCLUDED
25
26#include <stddef.h>
27#include <sys/types.h>
28
29#include "my_dbug.h"
30#include "my_inttypes.h"
32#include "my_sys.h"
33#include "my_tree.h" // TREE
34#include "prealloced_array.h" // Prealloced_array
35#include "sql/sql_array.h"
36#include "sql/sql_sort.h" // IWYU pragma: keep
37
39struct TABLE;
40
41/**
42 Unique -- class for unique (removing of duplicates).
43 Puts all values to the TREE. If the tree becomes too big,
44 it's dumped to the file. User can request sorted values, or
45 just iterate through them. In the last case tree merging is performed in
46 memory simultaneously with iteration, so it should be ~2-3x faster.
47
48 Unique values can be read only from final result (not on insert) because
49 duplicate values can be contained in different dumped tree files.
50*/
51
52class Unique {
53 /// Array of file pointers
55 /// Max elements in memory buffer
57 /// Memory buffer size
59 /// Cache file for unique values retrieval of table read AM in executor
61 /// Tree to filter duplicates in memory
64 /// Flush tree to disk
65 bool flush();
66 /// Element size
67 uint size;
68
69 public:
70 ulong elements;
71 Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg,
72 ulonglong max_in_memory_size_arg);
73 ~Unique();
75
76 /**
77 Add new value to Unique
78
79 @details The value is inserted either to the tree, or to the duplicate
80 weedout table, depending on the mode of operation. If tree's mem buffer is
81 full, it's flushed to the disk.
82
83 @param ptr pointer to the binary string to insert
84
85 @returns
86 false error or duplicate
87 true the value was inserted
88 */
89 inline bool unique_add(void *ptr) {
91 DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
92 if (tree.elements_in_tree > max_elements && flush()) return true;
93 return !tree_insert(&tree, ptr, 0, tree.custom_arg);
94 }
95
96 bool get(TABLE *table);
97
98 static double get_use_cost(uint nkeys, uint key_size,
100 const Cost_model_table *cost_model);
101
102 void reset();
103 bool walk(tree_walk_action action, void *walk_action_arg);
104
105 uint get_size() const { return size; }
107 bool is_in_memory() { return elements == 0; }
108 friend int unique_write_to_file(void *v_key, element_count count,
109 void *unique);
110 friend int unique_write_to_ptrs(void *v_key, element_count count,
111 void *unique);
112};
113
114/**
115 Unique_on_insert -- similar to above, but rejects duplicates on insert, not
116 just on read of the final result.
117 To achieve this values are inserted into mem tmp table which uses index to
118 detect duplicate keys. When memory buffer is full, tmp table is dumped to a
119 disk-based tmp table.
120*/
121
123 /// Element size
124 uint m_size;
125 /// Duplicate weedout tmp table
126 TABLE *m_table{nullptr};
127
128 public:
129 Unique_on_insert(uint size) : m_size(size) {}
130 /**
131 Add row id to the filter
132
133 @param ptr pointer to the rowid
134
135 @returns
136 false rowid successfully inserted
137 true duplicate or error
138 */
139 bool unique_add(void *ptr);
140
141 /**
142 Initialize duplicate filter - allocate duplicate weedout tmp table
143
144 @returns
145 false initialization succeeded
146 true an error occur
147 */
148 bool init();
149
150 /**
151 Reset filter - drop all rowid records
152
153 @param reinit Whether to restart index scan
154 */
155 void reset(bool reinit);
156
157 /**
158 Cleanup unique filter
159 */
160 void cleanup();
161};
162
163#endif // UNIQUES_INCLUDED
API for getting cost estimates for operations on table data.
Definition: opt_costmodel.h:241
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:70
Unique_on_insert – similar to above, but rejects duplicates on insert, not just on read of the final ...
Definition: uniques.h:122
uint m_size
Element size.
Definition: uniques.h:124
void cleanup()
Cleanup unique filter.
Definition: uniques.cc:1003
void reset(bool reinit)
Reset filter - drop all rowid records.
Definition: uniques.cc:988
Unique_on_insert(uint size)
Definition: uniques.h:129
bool init()
Initialize duplicate filter - allocate duplicate weedout tmp table.
Definition: uniques.cc:995
bool unique_add(void *ptr)
Add row id to the filter.
Definition: uniques.cc:970
TABLE * m_table
Duplicate weedout tmp table.
Definition: uniques.h:126
Unique – class for unique (removing of duplicates).
Definition: uniques.h:52
ulonglong max_in_memory_size
Memory buffer size.
Definition: uniques.h:58
friend int unique_write_to_file(void *v_key, element_count count, void *unique)
Definition: uniques.cc:333
uint get_size() const
Definition: uniques.h:105
ulong elements_in_tree()
Definition: uniques.h:74
bool unique_add(void *ptr)
Add new value to Unique.
Definition: uniques.h:89
bool flush()
Flush tree to disk.
Definition: uniques.cc:629
uchar * record_pointers
Definition: uniques.h:63
ulong max_elements
Max elements in memory buffer.
Definition: uniques.h:56
bool is_in_memory()
Definition: uniques.h:107
bool get(TABLE *table)
Definition: uniques.cc:891
ulong elements
Definition: uniques.h:70
Prealloced_array< Merge_chunk, 16 > file_ptrs
Array of file pointers.
Definition: uniques.h:54
friend int unique_write_to_ptrs(void *v_key, element_count count, void *unique)
Definition: uniques.cc:345
static double get_use_cost(uint nkeys, uint key_size, ulonglong max_in_memory_size, const Cost_model_table *cost_model)
Definition: uniques.cc:571
bool walk(tree_walk_action action, void *walk_action_arg)
Definition: uniques.cc:854
void reset()
Definition: uniques.cc:647
~Unique()
Definition: uniques.cc:623
IO_CACHE file
Cache file for unique values retrieval of table read AM in executor.
Definition: uniques.h:60
TREE tree
Tree to filter duplicates in memory.
Definition: uniques.h:62
ulonglong get_max_in_memory_size() const
Definition: uniques.h:106
Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg, ulonglong max_in_memory_size_arg)
Definition: uniques.cc:353
uint size
Element size.
Definition: uniques.h:67
int(* qsort2_cmp)(const void *, const void *, const void *)
Definition: my_sys.h:472
#define DBUG_PRINT(keyword, arglist)
Definition: my_dbug.h:180
#define DBUG_TRACE
Definition: my_dbug.h:145
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:55
unsigned char uchar
Definition: my_inttypes.h:51
Some macros for dealing with pointer arithmetic, e.g., aligning of buffers to a given size.
Common header for many mysys elements.
TREE_ELEMENT * tree_insert(TREE *tree, void *key, uint key_size, const void *custom_arg)
Definition: tree.cc:200
int(* tree_walk_action)(void *, element_count, void *)
Definition: my_tree.h:52
uint32 element_count
Definition: my_tree.h:51
static int count
Definition: myisam_ftdump.cc:44
static PFS_engine_table_share_proxy table
Definition: pfs.cc:60
repeated Action action
Definition: replication_group_member_actions.proto:42
Definition: my_sys.h:345
Definition: table.h:1396
Definition: my_tree.h:67
uint elements_in_tree
Definition: my_tree.h:70
const void * custom_arg
Definition: my_tree.h:73