MySQL 9.1.0
Source Code Documentation
uniques.h
Go to the documentation of this file.
1/* Copyright (c) 2001, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef UNIQUES_INCLUDED
25#define UNIQUES_INCLUDED
26
27#include <stddef.h>
28#include <sys/types.h>
29
30#include "my_dbug.h"
31#include "my_inttypes.h"
33#include "my_sys.h"
34#include "my_tree.h" // TREE
35#include "prealloced_array.h" // Prealloced_array
36#include "sql/sql_array.h"
37#include "sql/sql_sort.h" // IWYU pragma: keep
38
40struct TABLE;
41
42/**
43 Unique -- class for unique (removing of duplicates).
44 Puts all values to the TREE. If the tree becomes too big,
45 it's dumped to the file. User can request sorted values, or
46 just iterate through them. In the last case tree merging is performed in
47 memory simultaneously with iteration, so it should be ~2-3x faster.
48
49 Unique values can be read only from final result (not on insert) because
50 duplicate values can be contained in different dumped tree files.
51*/
52
53class Unique {
54 /// Array of file pointers
56 /// Max elements in memory buffer
58 /// Memory buffer size
60 /// Cache file for unique values retrieval of table read AM in executor
62 /// Tree to filter duplicates in memory
65 /// Flush tree to disk
66 bool flush();
67 /// Element size
68 uint size;
69
70 public:
71 ulong elements;
72 Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg,
73 ulonglong max_in_memory_size_arg);
74 ~Unique();
76
77 /**
78 Add new value to Unique
79
80 @details The value is inserted either to the tree, or to the duplicate
81 weedout table, depending on the mode of operation. If tree's mem buffer is
82 full, it's flushed to the disk.
83
84 @param ptr pointer to the binary string to insert
85
86 @returns
87 false error or duplicate
88 true the value was inserted
89 */
90 inline bool unique_add(void *ptr) {
92 DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
93 if (tree.elements_in_tree > max_elements && flush()) return true;
94 return !tree_insert(&tree, ptr, 0, tree.custom_arg);
95 }
96
97 bool get(TABLE *table);
98
99 static double get_use_cost(uint nkeys, uint key_size,
101 const Cost_model_table *cost_model);
102
103 void reset();
104 bool walk(tree_walk_action action, void *walk_action_arg);
105
106 uint get_size() const { return size; }
108 bool is_in_memory() { return elements == 0; }
109 friend int unique_write_to_file(void *v_key, element_count count,
110 void *unique);
111 friend int unique_write_to_ptrs(void *v_key, element_count count,
112 void *unique);
113};
114
115/**
116 Unique_on_insert -- similar to above, but rejects duplicates on insert, not
117 just on read of the final result.
118 To achieve this values are inserted into mem tmp table which uses index to
119 detect duplicate keys. When memory buffer is full, tmp table is dumped to a
120 disk-based tmp table.
121*/
122
124 /// Element size
125 uint m_size;
126 /// Duplicate weedout tmp table
127 TABLE *m_table{nullptr};
128
129 public:
131 /**
132 Add row id to the filter
133
134 @param ptr pointer to the rowid
135
136 @returns
137 false rowid successfully inserted
138 true duplicate or error
139 */
140 bool unique_add(void *ptr);
141
142 /**
143 Initialize duplicate filter - allocate duplicate weedout tmp table
144
145 @returns
146 false initialization succeeded
147 true an error occur
148 */
149 bool init();
150
151 /**
152 Reset filter - drop all rowid records
153
154 @param reinit Whether to restart index scan
155 */
156 void reset(bool reinit);
157
158 /**
159 Cleanup unique filter
160 */
161 void cleanup();
162};
163
164#endif // UNIQUES_INCLUDED
API for getting cost estimates for operations on table data.
Definition: opt_costmodel.h:242
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:71
Unique_on_insert – similar to above, but rejects duplicates on insert, not just on read of the final ...
Definition: uniques.h:123
uint m_size
Element size.
Definition: uniques.h:125
void cleanup()
Cleanup unique filter.
Definition: uniques.cc:1004
void reset(bool reinit)
Reset filter - drop all rowid records.
Definition: uniques.cc:989
Unique_on_insert(uint size)
Definition: uniques.h:130
bool init()
Initialize duplicate filter - allocate duplicate weedout tmp table.
Definition: uniques.cc:996
bool unique_add(void *ptr)
Add row id to the filter.
Definition: uniques.cc:971
TABLE * m_table
Duplicate weedout tmp table.
Definition: uniques.h:127
Unique – class for unique (removing of duplicates).
Definition: uniques.h:53
ulonglong max_in_memory_size
Memory buffer size.
Definition: uniques.h:59
friend int unique_write_to_file(void *v_key, element_count count, void *unique)
Definition: uniques.cc:334
uint get_size() const
Definition: uniques.h:106
ulong elements_in_tree()
Definition: uniques.h:75
bool unique_add(void *ptr)
Add new value to Unique.
Definition: uniques.h:90
bool flush()
Flush tree to disk.
Definition: uniques.cc:630
uchar * record_pointers
Definition: uniques.h:64
ulong max_elements
Max elements in memory buffer.
Definition: uniques.h:57
bool is_in_memory()
Definition: uniques.h:108
bool get(TABLE *table)
Definition: uniques.cc:892
ulong elements
Definition: uniques.h:71
Prealloced_array< Merge_chunk, 16 > file_ptrs
Array of file pointers.
Definition: uniques.h:55
friend int unique_write_to_ptrs(void *v_key, element_count count, void *unique)
Definition: uniques.cc:346
static double get_use_cost(uint nkeys, uint key_size, ulonglong max_in_memory_size, const Cost_model_table *cost_model)
Definition: uniques.cc:572
bool walk(tree_walk_action action, void *walk_action_arg)
Definition: uniques.cc:855
void reset()
Definition: uniques.cc:648
~Unique()
Definition: uniques.cc:624
IO_CACHE file
Cache file for unique values retrieval of table read AM in executor.
Definition: uniques.h:61
TREE tree
Tree to filter duplicates in memory.
Definition: uniques.h:63
ulonglong get_max_in_memory_size() const
Definition: uniques.h:107
Unique(qsort2_cmp comp_func, void *comp_func_fixed_arg, uint size_arg, ulonglong max_in_memory_size_arg)
Definition: uniques.cc:354
uint size
Element size.
Definition: uniques.h:68
int(* qsort2_cmp)(const void *, const void *, const void *)
Definition: my_sys.h:461
#define DBUG_PRINT(keyword, arglist)
Definition: my_dbug.h:181
#define DBUG_TRACE
Definition: my_dbug.h:146
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:56
unsigned char uchar
Definition: my_inttypes.h:52
Some macros for dealing with pointer arithmetic, e.g., aligning of buffers to a given size.
Common header for many mysys elements.
TREE_ELEMENT * tree_insert(TREE *tree, void *key, uint key_size, const void *custom_arg)
Definition: tree.cc:201
int(* tree_walk_action)(void *, element_count, void *)
Definition: my_tree.h:53
uint32 element_count
Definition: my_tree.h:52
static int count
Definition: myisam_ftdump.cc:45
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
size_t size(const char *const c)
Definition: base64.h:46
repeated Action action
Definition: replication_group_member_actions.proto:43
Definition: my_sys.h:334
Definition: table.h:1421
Definition: my_tree.h:68
uint elements_in_tree
Definition: my_tree.h:71
const void * custom_arg
Definition: my_tree.h:74