MySQL 9.1.0
Source Code Documentation
value_map.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_VALUE_MAP_INCLUDED
2#define HISTOGRAMS_VALUE_MAP_INCLUDED
3
4/* Copyright (c) 2017, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file sql/histograms/value_map.h
29*/
30
31#include <stddef.h>
32#include <functional> // std::less
33#include <map>
34#include <string>
35#include <utility> // std::pair
36
37#include "my_alloc.h"
38#include "my_base.h" // ha_rows
40#include "mysql_time.h"
42
43class String;
44class my_decimal;
45template <class T>
47
48namespace histograms {
49
50class Histogram;
51template <class T>
52struct SingletonBucket;
53
54namespace equi_height {
55template <class T>
56class Bucket;
57} // namespace equi_height
58
59/**
60 The maximum number of characters to evaluate when building histograms. For
61 binary/blob values, this is the number of bytes to consider.
62*/
63static const size_t HISTOGRAM_MAX_COMPARE_LENGTH = 42;
64
65/**
66 Histogram comparator.
67
68 Typical usage is in a "value map", where we for instance need to sort based
69 on string collation and similar.
70*/
72 public:
73 /**
74 Overload operator(), so that we can use this struct as a custom comparator
75 in std classes/functions.
76
77 @param lhs first value to compare
78 @param rhs second value to compare
79
80 @return true if lhs is considered to be smaller/less than rhs. false
81 otherwise.
82 */
83 template <class T>
84 bool operator()(const T &lhs, const T &rhs) const {
85 return std::less<T>()(lhs, rhs);
86 }
87
88 /**
89 Used by std::lower_bound when computing equal-to and less-than selectivity
90 to find the first bucket with an upper bound that is not less than b.
91 */
92 template <class T>
93 bool operator()(const equi_height::Bucket<T> &a, const T &b) const {
95 }
96
97 /**
98 * Same as above, but for singleton histogram buckets.
99 */
100 template <class T>
101 bool operator()(const SingletonBucket<T> &a, const T &b) const {
102 return Histogram_comparator()(a.value, b);
103 }
104
105 /**
106 Used by std::upper_bound when computing greater-than selectivity in order to
107 find the first bucket with an upper bound that is greater than a.
108 Notice that the comparison function used by std::lower_bound and
109 std::upper_bound have the collection element as the first and second
110 argument, respectively.
111 */
112 template <class T>
113 bool operator()(const T &a, const equi_height::Bucket<T> &b) const {
115 }
116
117 /**
118 * Same as above, but for singleton histogram buckets.
119 */
120 template <class T>
121 bool operator()(const T &a, const SingletonBucket<T> &b) const {
122 return Histogram_comparator()(a, b.value);
123 }
124
125 /**
126 Used by std::is_sorted to verify that equi-height histogram buckets are
127 stored in sorted order. We consider bucket a = [a1, a2] to be less than
128 bucket b = [b1, b2] if a2 < b1.
129 */
130 template <class T>
132 const equi_height::Bucket<T> &b) const {
135 }
136
137 /**
138 * Same as above, but for singleton histogram buckets.
139 */
140 template <class T>
142 const SingletonBucket<T> &b) const {
143 return Histogram_comparator()(a.value, b.value);
144 }
145};
146
147/**
148 The abstract base class for all Value_map types.
149
150 We would ideally like to have only one class for the Value map concept (no
151 inheritance) which would gives us an easier interface. But there are some
152 reasons for why we need to to split the class into a non-templated base class
153 and a templated subclass:
154
155 - We are collecting Value_maps in a collection/vector where they have a
156 different template type. This cannot be achieved unless we have a
157 non-templated base class.
158
159 - When working on a collection of Value_maps, it is more convenient to declare
160 the interface in the base class (Value_map_base) so that we don't need to
161 do a cast to the subclass in order to get hold of the methods we want to
162 use.
163
164 - Value_map_base::add_values and Value_map::add_values looks like the same
165 function, but they are not. Value_map_base::add_values is a small functions
166 that helps us cast the Value_map<T> to the correct type (for instance
167 Value_map<longlong>). Ideally, this function would have been pure virtual,
168 but it's not possible to have virtual member function templates.
169*/
171 private:
176
177 protected:
179
180 public:
182
183 virtual ~Value_map_base() = default;
184
185 /**
186 Returns the number of [value, count] pairs in the Value_map.
187
188 @return The number of values in the Value_map.
189 */
190 virtual size_t size() const = 0;
191
192 /**
193 Add a value with the given count to this Value_map. If the given value
194 already exists, the count will be added to the existing count.
195
196 @param value The value to add.
197 @param count Count of the value to add.
198
199 @return false on success, and true in case of errors (OOM).
200 */
201 template <class T>
202 bool add_values(const T &value, const ha_rows count);
203
204 /**
205 Increase the number of null values with the given count.
206
207 @param count The number of null values.
208 */
210
211 /// @return The number of null values in this Value_map.
213
214 /**
215 Create a Histogram from this Value_map.
216
217 The resulting histogram will have at most "num_buckets" buckets (might be
218 less), and all of its contents will be allocated on the supplied MEM_ROOT.
219
220 @param mem_root The MEM_ROOT to allocate the contents on
221 @param num_buckets Maximum number of buckets to create
222 @param db_name Database name
223 @param tbl_name Table name
224 @param col_name Column name
225
226 @return nullptr on error, or a valid histogram if success.
227 */
228 virtual Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
229 const std::string &db_name,
230 const std::string &tbl_name,
231 const std::string &col_name) const = 0;
232
233 /// @return The sampling rate that was used to generate this Value_map.
234 double get_sampling_rate() const { return m_sampling_rate; }
235
236 /**
237 Set the sampling rate that was used to generate this Value_map.
238
239 @param sampling_rate The sampling rate.
240 */
241 void set_sampling_rate(double sampling_rate) {
242 m_sampling_rate = sampling_rate;
243 }
244
245 /// @return the character set for the data this Value_map contains
246 const CHARSET_INFO *get_character_set() const { return m_charset; }
247
248 /// @return the data type that this Value_map contains
250
251 /// @return the overhead in bytes for each distinct value stored in the
252 /// Value_map.
253 virtual size_t element_overhead() const = 0;
254};
255
256/**
257 Value_map class.
258
259 This class works as a map. It is a collection of [key, count], where "count"
260 is the number of occurrences of "key". The class abstracts away things like
261 duplicate checking and the underlying container.
262*/
263template <class T>
264class Value_map final : public Value_map_base {
265 private:
269
271
272 public:
274 : Value_map_base(charset, data_type),
275 m_value_map(typename value_map_type::allocator_type(&m_mem_root)) {}
276
277 size_t size() const override { return m_value_map.size(); }
278
279 typename value_map_type::const_iterator begin() const {
280 return m_value_map.cbegin();
281 }
282
283 typename value_map_type::const_iterator end() const {
284 return m_value_map.cend();
285 }
286
287 bool add_values(const T &value, const ha_rows count);
288
289 /**
290 Insert a range of values into the Value_map.
291
292 Values in the range (begin, end] must be sorted according to
293 Histogram_comparator. Note that this function is currently only used in
294 unit testing.
295
296 @note The value map must be empty before calling this function.
297
298 @param begin Iterator that points to the beginning of the range.
299 @param end Iterator that points to the end of the range.
300
301 @return false on success, true on error (OOM or similar).
302 */
303 bool insert(typename value_map_type::const_iterator begin,
304 typename value_map_type::const_iterator end);
305
306 Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
307 const std::string &db_name,
308 const std::string &tbl_name,
309 const std::string &col_name) const override;
310
311 /// @return the overhead in bytes for each distinct value stored in the
312 /// Value_map. The value 32 is obtained from both GCC 8.2 and
313 /// Clang 8.0 (same as sizeof(value_map_type::node_type) in C++17).
314 size_t element_overhead() const override {
315 // TODO: Replace this with sizeof(value_map_type::node_type) when we have
316 // full C++17 support.
317 return sizeof(typename value_map_type::value_type) +
318 sizeof(typename value_map_type::key_type) + 32;
319 }
320};
321
322// Explicit template instantiations.
323template <>
324bool Histogram_comparator::operator()(const String &, const String &) const;
325
326template <>
328 const MYSQL_TIME &) const;
329
330template <>
332 const my_decimal &) const;
333
334} // namespace histograms
335
336#endif
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:68
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:167
Histogram base class.
Definition: histogram.h:314
The abstract base class for all Value_map types.
Definition: value_map.h:170
virtual Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const =0
Create a Histogram from this Value_map.
virtual size_t size() const =0
Returns the number of [value, count] pairs in the Value_map.
const CHARSET_INFO * m_charset
Definition: value_map.h:173
virtual ~Value_map_base()=default
bool add_values(const T &value, const ha_rows count)
Add a value with the given count to this Value_map.
Definition: value_map.cc:87
double get_sampling_rate() const
Definition: value_map.h:234
void add_null_values(const ha_rows count)
Increase the number of null values with the given count.
Definition: value_map.h:209
Value_map_type get_data_type() const
Definition: value_map.h:249
virtual size_t element_overhead() const =0
ha_rows m_num_null_values
Definition: value_map.h:174
MEM_ROOT m_mem_root
Definition: value_map.h:178
double m_sampling_rate
Definition: value_map.h:172
Value_map_base(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.cc:78
void set_sampling_rate(double sampling_rate)
Set the sampling rate that was used to generate this Value_map.
Definition: value_map.h:241
const CHARSET_INFO * get_character_set() const
Definition: value_map.h:246
ha_rows get_num_null_values() const
Definition: value_map.h:212
const Value_map_type m_data_type
Definition: value_map.h:175
Value_map class.
Definition: value_map.h:264
value_map_type::const_iterator end() const
Definition: value_map.h:283
size_t element_overhead() const override
Definition: value_map.h:314
bool insert(typename value_map_type::const_iterator begin, typename value_map_type::const_iterator end)
Insert a range of values into the Value_map.
Definition: value_map.cc:137
bool add_values(const T &value, const ha_rows count)
Definition: value_map.cc:93
size_t size() const override
Returns the number of [value, count] pairs in the Value_map.
Definition: value_map.h:277
Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const override
Create a Histogram from this Value_map.
Definition: value_map.cc:151
std::map< T, ha_rows, Histogram_comparator, Mem_root_allocator< std::pair< const T, ha_rows > > > value_map_type
Definition: value_map.h:268
value_map_type m_value_map
Definition: value_map.h:270
value_map_type::const_iterator begin() const
Definition: value_map.h:279
Value_map(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.h:273
Equi-height bucket.
Definition: equi_height_bucket.h:54
const T & get_upper_inclusive() const
Definition: equi_height_bucket.h:104
const T & get_lower_inclusive() const
Definition: equi_height_bucket.h:99
my_decimal class limits 'decimal_t' type to what we need in MySQL.
Definition: my_decimal.h:96
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
A better implementation of the UNIX ctype(3) library.
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1141
static int count
Definition: myisam_ftdump.cc:45
Time declarations shared between the server and client API: you should not add anything to this heade...
uint16_t value_type
Definition: vt100.h:184
const std::string charset("charset")
Definition: column_statistics.h:34
static const size_t HISTOGRAM_MAX_COMPARE_LENGTH
The maximum number of characters to evaluate when building histograms.
Definition: value_map.h:63
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:33
int key_type
Definition: method.h:38
const char * db_name
Definition: rules_table_service.cc:55
std::map< Key, Value, Compare, ut::allocator< std::pair< const Key, Value > > > map
Specialization of map which uses ut_allocator.
Definition: ut0new.h:2894
Definition: m_ctype.h:421
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
Definition: mysql_time.h:82
Definition: completion_hash.h:40
Histogram comparator.
Definition: value_map.h:71
bool operator()(const SingletonBucket< T > &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:141
bool operator()(const equi_height::Bucket< T > &a, const T &b) const
Used by std::lower_bound when computing equal-to and less-than selectivity to find the first bucket w...
Definition: value_map.h:93
bool operator()(const T &lhs, const T &rhs) const
Overload operator(), so that we can use this struct as a custom comparator in std classes/functions.
Definition: value_map.h:84
bool operator()(const T &a, const equi_height::Bucket< T > &b) const
Used by std::upper_bound when computing greater-than selectivity in order to find the first bucket wi...
Definition: value_map.h:113
bool operator()(const T &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:121
bool operator()(const SingletonBucket< T > &a, const T &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:101
bool operator()(const equi_height::Bucket< T > &a, const equi_height::Bucket< T > &b) const
Used by std::is_sorted to verify that equi-height histogram buckets are stored in sorted order.
Definition: value_map.h:131
Definition: singleton.h:107
T value
Definition: singleton.h:108