MySQL 9.5.0
Source Code Documentation
value_map.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_VALUE_MAP_INCLUDED
2#define HISTOGRAMS_VALUE_MAP_INCLUDED
3
4/* Copyright (c) 2017, 2025, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file sql/histograms/value_map.h
29*/
30
31#include <stddef.h>
32#include <functional> // std::less
33#include <map>
34#include <string>
35#include <utility> // std::pair
36
37#include "my_alloc.h"
38#include "my_base.h" // ha_rows
39#include "my_temporal.h"
41#include "mysql_time.h"
43
44class String;
45class my_decimal;
46template <class T>
48
49namespace histograms {
50
51class Histogram;
52template <class T>
53struct SingletonBucket;
54
55namespace equi_height {
56template <class T>
57class Bucket;
58} // namespace equi_height
59
60/**
61 The maximum number of characters to evaluate when building histograms. For
62 binary/blob values, this is the number of bytes to consider.
63*/
64static const size_t HISTOGRAM_MAX_COMPARE_LENGTH = 42;
65
66/**
67 Histogram comparator.
68
69 Typical usage is in a "value map", where we for instance need to sort based
70 on string collation and similar.
71*/
73 public:
74 /**
75 Overload operator(), so that we can use this struct as a custom comparator
76 in std classes/functions.
77
78 @param lhs first value to compare
79 @param rhs second value to compare
80
81 @return true if lhs is considered to be smaller/less than rhs. false
82 otherwise.
83 */
84 template <class T>
85 bool operator()(const T &lhs, const T &rhs) const {
86 return std::less<T>()(lhs, rhs);
87 }
88
89 /**
90 Used by std::lower_bound when computing equal-to and less-than selectivity
91 to find the first bucket with an upper bound that is not less than b.
92 */
93 template <class T>
94 bool operator()(const equi_height::Bucket<T> &a, const T &b) const {
96 }
97
98 /**
99 * Same as above, but for singleton histogram buckets.
100 */
101 template <class T>
102 bool operator()(const SingletonBucket<T> &a, const T &b) const {
103 return Histogram_comparator()(a.value, b);
104 }
105
106 /**
107 Used by std::upper_bound when computing greater-than selectivity in order to
108 find the first bucket with an upper bound that is greater than a.
109 Notice that the comparison function used by std::lower_bound and
110 std::upper_bound have the collection element as the first and second
111 argument, respectively.
112 */
113 template <class T>
114 bool operator()(const T &a, const equi_height::Bucket<T> &b) const {
116 }
117
118 /**
119 * Same as above, but for singleton histogram buckets.
120 */
121 template <class T>
122 bool operator()(const T &a, const SingletonBucket<T> &b) const {
123 return Histogram_comparator()(a, b.value);
124 }
125
126 /**
127 Used by std::is_sorted to verify that equi-height histogram buckets are
128 stored in sorted order. We consider bucket a = [a1, a2] to be less than
129 bucket b = [b1, b2] if a2 < b1.
130 */
131 template <class T>
133 const equi_height::Bucket<T> &b) const {
136 }
137
138 /**
139 * Same as above, but for singleton histogram buckets.
140 */
141 template <class T>
143 const SingletonBucket<T> &b) const {
144 return Histogram_comparator()(a.value, b.value);
145 }
146};
147
148/**
149 The abstract base class for all Value_map types.
150
151 We would ideally like to have only one class for the Value map concept (no
152 inheritance) which would gives us an easier interface. But there are some
153 reasons for why we need to to split the class into a non-templated base class
154 and a templated subclass:
155
156 - We are collecting Value_maps in a collection/vector where they have a
157 different template type. This cannot be achieved unless we have a
158 non-templated base class.
159
160 - When working on a collection of Value_maps, it is more convenient to declare
161 the interface in the base class (Value_map_base) so that we don't need to
162 do a cast to the subclass in order to get hold of the methods we want to
163 use.
164
165 - Value_map_base::add_values and Value_map::add_values looks like the same
166 function, but they are not. Value_map_base::add_values is a small functions
167 that helps us cast the Value_map<T> to the correct type (for instance
168 Value_map<longlong>). Ideally, this function would have been pure virtual,
169 but it's not possible to have virtual member function templates.
170*/
172 private:
177
178 protected:
180
181 public:
183
184 virtual ~Value_map_base() = default;
185
186 /**
187 Returns the number of [value, count] pairs in the Value_map.
188
189 @return The number of values in the Value_map.
190 */
191 virtual size_t size() const = 0;
192
193 /**
194 Add a value with the given count to this Value_map. If the given value
195 already exists, the count will be added to the existing count.
196
197 @param value The value to add.
198 @param count Count of the value to add.
199
200 @return false on success, and true in case of errors (OOM).
201 */
202 template <class T>
203 bool add_values(const T &value, const ha_rows count);
204
205 /**
206 Increase the number of null values with the given count.
207
208 @param count The number of null values.
209 */
211
212 /// @return The number of null values in this Value_map.
214
215 /**
216 Create a Histogram from this Value_map.
217
218 The resulting histogram will have at most "num_buckets" buckets (might be
219 less), and all of its contents will be allocated on the supplied MEM_ROOT.
220
221 @param mem_root The MEM_ROOT to allocate the contents on
222 @param num_buckets Maximum number of buckets to create
223 @param db_name Database name
224 @param tbl_name Table name
225 @param col_name Column name
226
227 @return nullptr on error, or a valid histogram if success.
228 */
229 virtual Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
230 const std::string &db_name,
231 const std::string &tbl_name,
232 const std::string &col_name) const = 0;
233
234 /// @return The sampling rate that was used to generate this Value_map.
235 double get_sampling_rate() const { return m_sampling_rate; }
236
237 /**
238 Set the sampling rate that was used to generate this Value_map.
239
240 @param sampling_rate The sampling rate.
241 */
242 void set_sampling_rate(double sampling_rate) {
243 m_sampling_rate = sampling_rate;
244 }
245
246 /// @return the character set for the data this Value_map contains
247 const CHARSET_INFO *get_character_set() const { return m_charset; }
248
249 /// @return the data type that this Value_map contains
251
252 /// @return the overhead in bytes for each distinct value stored in the
253 /// Value_map.
254 virtual size_t element_overhead() const = 0;
255};
256
257/**
258 Value_map class.
259
260 This class works as a map. It is a collection of [key, count], where "count"
261 is the number of occurrences of "key". The class abstracts away things like
262 duplicate checking and the underlying container.
263*/
264template <class T>
265class Value_map final : public Value_map_base {
266 private:
270
272
273 public:
275 : Value_map_base(charset, data_type),
276 m_value_map(typename value_map_type::allocator_type(&m_mem_root)) {}
277
278 size_t size() const override { return m_value_map.size(); }
279
280 typename value_map_type::const_iterator begin() const {
281 return m_value_map.cbegin();
282 }
283
284 typename value_map_type::const_iterator end() const {
285 return m_value_map.cend();
286 }
287
288 bool add_values(const T &value, const ha_rows count);
289
290 /**
291 Insert a range of values into the Value_map.
292
293 Values in the range (begin, end] must be sorted according to
294 Histogram_comparator. Note that this function is currently only used in
295 unit testing.
296
297 @note The value map must be empty before calling this function.
298
299 @param begin Iterator that points to the beginning of the range.
300 @param end Iterator that points to the end of the range.
301
302 @return false on success, true on error (OOM or similar).
303 */
304 bool insert(typename value_map_type::const_iterator begin,
305 typename value_map_type::const_iterator end);
306
307 Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
308 const std::string &db_name,
309 const std::string &tbl_name,
310 const std::string &col_name) const override;
311
312 /// @return the overhead in bytes for each distinct value stored in the
313 /// Value_map. The value 32 is obtained from both GCC 8.2 and
314 /// Clang 8.0 (same as sizeof(value_map_type::node_type) in C++17).
315 size_t element_overhead() const override {
316 // TODO: Replace this with sizeof(value_map_type::node_type) when we have
317 // full C++17 support.
318 return sizeof(typename value_map_type::value_type) +
319 sizeof(typename value_map_type::key_type) + 32;
320 }
321};
322
323// Explicit template instantiations.
324template <>
325bool Histogram_comparator::operator()(const String &, const String &) const;
326
327template <>
328bool Histogram_comparator::operator()(const Time_val &, const Time_val &) const;
329
330template <>
332 const Datetime_val &) const;
333
334template <>
336 const my_decimal &) const;
337
338} // namespace histograms
339
340#endif
Definition: my_temporal.h:339
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:68
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:169
Time_val is a temporal type that represents only time.
Definition: my_temporal.h:55
Histogram base class.
Definition: histogram.h:314
The abstract base class for all Value_map types.
Definition: value_map.h:171
virtual Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const =0
Create a Histogram from this Value_map.
virtual size_t size() const =0
Returns the number of [value, count] pairs in the Value_map.
const CHARSET_INFO * m_charset
Definition: value_map.h:174
virtual ~Value_map_base()=default
bool add_values(const T &value, const ha_rows count)
Add a value with the given count to this Value_map.
Definition: value_map.cc:93
double get_sampling_rate() const
Definition: value_map.h:235
void add_null_values(const ha_rows count)
Increase the number of null values with the given count.
Definition: value_map.h:210
Value_map_type get_data_type() const
Definition: value_map.h:250
virtual size_t element_overhead() const =0
ha_rows m_num_null_values
Definition: value_map.h:175
MEM_ROOT m_mem_root
Definition: value_map.h:179
double m_sampling_rate
Definition: value_map.h:173
Value_map_base(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.cc:84
void set_sampling_rate(double sampling_rate)
Set the sampling rate that was used to generate this Value_map.
Definition: value_map.h:242
const CHARSET_INFO * get_character_set() const
Definition: value_map.h:247
ha_rows get_num_null_values() const
Definition: value_map.h:213
const Value_map_type m_data_type
Definition: value_map.h:176
Value_map class.
Definition: value_map.h:265
value_map_type::const_iterator end() const
Definition: value_map.h:284
size_t element_overhead() const override
Definition: value_map.h:315
bool insert(typename value_map_type::const_iterator begin, typename value_map_type::const_iterator end)
Insert a range of values into the Value_map.
Definition: value_map.cc:144
bool add_values(const T &value, const ha_rows count)
Definition: value_map.cc:99
size_t size() const override
Returns the number of [value, count] pairs in the Value_map.
Definition: value_map.h:278
Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const override
Create a Histogram from this Value_map.
Definition: value_map.cc:158
std::map< T, ha_rows, Histogram_comparator, Mem_root_allocator< std::pair< const T, ha_rows > > > value_map_type
Definition: value_map.h:269
value_map_type m_value_map
Definition: value_map.h:271
value_map_type::const_iterator begin() const
Definition: value_map.h:280
Value_map(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.h:274
Equi-height bucket.
Definition: equi_height_bucket.h:54
const T & get_upper_inclusive() const
Definition: equi_height_bucket.h:104
const T & get_lower_inclusive() const
Definition: equi_height_bucket.h:99
my_decimal class limits 'decimal_t' type to what we need in MySQL.
Definition: my_decimal.h:97
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
#define T
Definition: jit_executor_value.cc:373
A better implementation of the UNIX ctype(3) library.
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1217
Server classes for temporal handling (DATE, TIME, DATETIME)
static int count
Definition: myisam_ftdump.cc:45
uint16_t value_type
Definition: vt100.h:184
const std::string charset("charset")
Definition: column_statistics.h:34
static const size_t HISTOGRAM_MAX_COMPARE_LENGTH
The maximum number of characters to evaluate when building histograms.
Definition: value_map.h:64
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:33
int key_type
Definition: method.h:38
ValueType value(const std::optional< ValueType > &v)
Definition: gtid.h:83
const char * db_name
Definition: rules_table_service.cc:55
std::map< Key, Value, Compare, ut::allocator< std::pair< const Key, Value > > > map
Specialization of map which uses ut_allocator.
Definition: ut0new.h:2898
Definition: m_ctype.h:421
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
Definition: completion_hash.h:40
Histogram comparator.
Definition: value_map.h:72
bool operator()(const SingletonBucket< T > &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:142
bool operator()(const equi_height::Bucket< T > &a, const T &b) const
Used by std::lower_bound when computing equal-to and less-than selectivity to find the first bucket w...
Definition: value_map.h:94
bool operator()(const T &lhs, const T &rhs) const
Overload operator(), so that we can use this struct as a custom comparator in std classes/functions.
Definition: value_map.h:85
bool operator()(const T &a, const equi_height::Bucket< T > &b) const
Used by std::upper_bound when computing greater-than selectivity in order to find the first bucket wi...
Definition: value_map.h:114
bool operator()(const T &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:122
bool operator()(const SingletonBucket< T > &a, const T &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:102
bool operator()(const equi_height::Bucket< T > &a, const equi_height::Bucket< T > &b) const
Used by std::is_sorted to verify that equi-height histogram buckets are stored in sorted order.
Definition: value_map.h:132
Definition: singleton.h:107
T value
Definition: singleton.h:108