MySQL 8.3.0
Source Code Documentation
value_map.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_VALUE_MAP_INCLUDED
2#define HISTOGRAMS_VALUE_MAP_INCLUDED
3
4/* Copyright (c) 2017, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file sql/histograms/value_map.h
28*/
29
30#include <stddef.h>
31#include <functional> // std::less
32#include <map>
33#include <string>
34#include <utility> // std::pair
35
36#include "my_alloc.h"
37#include "my_base.h" // ha_rows
39#include "mysql_time.h"
41
42class String;
43class my_decimal;
44template <class T>
46
47namespace histograms {
48
49class Histogram;
50template <class T>
51struct SingletonBucket;
52
53namespace equi_height {
54template <class T>
55class Bucket;
56} // namespace equi_height
57
58/**
59 The maximum number of characters to evaluate when building histograms. For
60 binary/blob values, this is the number of bytes to consider.
61*/
62static const size_t HISTOGRAM_MAX_COMPARE_LENGTH = 42;
63
64/**
65 Histogram comparator.
66
67 Typical usage is in a "value map", where we for instance need to sort based
68 on string collation and similar.
69*/
71 public:
72 /**
73 Overload operator(), so that we can use this struct as a custom comparator
74 in std classes/functions.
75
76 @param lhs first value to compare
77 @param rhs second value to compare
78
79 @return true if lhs is considered to be smaller/less than rhs. false
80 otherwise.
81 */
82 template <class T>
83 bool operator()(const T &lhs, const T &rhs) const {
84 return std::less<T>()(lhs, rhs);
85 }
86
87 /**
88 Used by std::lower_bound when computing equal-to and less-than selectivity
89 to find the first bucket with an upper bound that is not less than b.
90 */
91 template <class T>
92 bool operator()(const equi_height::Bucket<T> &a, const T &b) const {
94 }
95
96 /**
97 * Same as above, but for singleton histogram buckets.
98 */
99 template <class T>
100 bool operator()(const SingletonBucket<T> &a, const T &b) const {
101 return Histogram_comparator()(a.value, b);
102 }
103
104 /**
105 Used by std::upper_bound when computing greater-than selectivity in order to
106 find the first bucket with an upper bound that is greater than a.
107 Notice that the comparison function used by std::lower_bound and
108 std::upper_bound have the collection element as the first and second
109 argument, respectively.
110 */
111 template <class T>
112 bool operator()(const T &a, const equi_height::Bucket<T> &b) const {
114 }
115
116 /**
117 * Same as above, but for singleton histogram buckets.
118 */
119 template <class T>
120 bool operator()(const T &a, const SingletonBucket<T> &b) const {
121 return Histogram_comparator()(a, b.value);
122 }
123
124 /**
125 Used by std::is_sorted to verify that equi-height histogram buckets are
126 stored in sorted order. We consider bucket a = [a1, a2] to be less than
127 bucket b = [b1, b2] if a2 < b1.
128 */
129 template <class T>
131 const equi_height::Bucket<T> &b) const {
134 }
135
136 /**
137 * Same as above, but for singleton histogram buckets.
138 */
139 template <class T>
141 const SingletonBucket<T> &b) const {
142 return Histogram_comparator()(a.value, b.value);
143 }
144};
145
146/**
147 The abstract base class for all Value_map types.
148
149 We would ideally like to have only one class for the Value map concept (no
150 inheritance) which would gives us an easier interface. But there are some
151 reasons for why we need to to split the class into a non-templated base class
152 and a templated subclass:
153
154 - We are collecting Value_maps in a collection/vector where they have a
155 different template type. This cannot be achieved unless we have a
156 non-templated base class.
157
158 - When working on a collection of Value_maps, it is more convenient to declare
159 the interface in the base class (Value_map_base) so that we don't need to
160 do a cast to the subclass in order to get hold of the methods we want to
161 use.
162
163 - Value_map_base::add_values and Value_map::add_values looks like the same
164 function, but they are not. Value_map_base::add_values is a small functions
165 that helps us cast the Value_map<T> to the correct type (for instance
166 Value_map<longlong>). Ideally, this function would have been pure virtual,
167 but it's not possible to have virtual member function templates.
168*/
170 private:
175
176 protected:
178
179 public:
181
182 virtual ~Value_map_base() = default;
183
184 /**
185 Returns the number of [value, count] pairs in the Value_map.
186
187 @return The number of values in the Value_map.
188 */
189 virtual size_t size() const = 0;
190
191 /**
192 Add a value with the given count to this Value_map. If the given value
193 already exists, the count will be added to the existing count.
194
195 @param value The value to add.
196 @param count Count of the value to add.
197
198 @return false on success, and true in case of errors (OOM).
199 */
200 template <class T>
201 bool add_values(const T &value, const ha_rows count);
202
203 /**
204 Increase the number of null values with the given count.
205
206 @param count The number of null values.
207 */
209
210 /// @return The number of null values in this Value_map.
212
213 /**
214 Create a Histogram from this Value_map.
215
216 The resulting histogram will have at most "num_buckets" buckets (might be
217 less), and all of its contents will be allocated on the supplied MEM_ROOT.
218
219 @param mem_root The MEM_ROOT to allocate the contents on
220 @param num_buckets Maximum number of buckets to create
221 @param db_name Database name
222 @param tbl_name Table name
223 @param col_name Column name
224
225 @return nullptr on error, or a valid histogram if success.
226 */
227 virtual Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
228 const std::string &db_name,
229 const std::string &tbl_name,
230 const std::string &col_name) const = 0;
231
232 /// @return The sampling rate that was used to generate this Value_map.
233 double get_sampling_rate() const { return m_sampling_rate; }
234
235 /**
236 Set the sampling rate that was used to generate this Value_map.
237
238 @param sampling_rate The sampling rate.
239 */
240 void set_sampling_rate(double sampling_rate) {
241 m_sampling_rate = sampling_rate;
242 }
243
244 /// @return the character set for the data this Value_map contains
245 const CHARSET_INFO *get_character_set() const { return m_charset; }
246
247 /// @return the data type that this Value_map contains
249
250 /// @return the overhead in bytes for each distinct value stored in the
251 /// Value_map.
252 virtual size_t element_overhead() const = 0;
253};
254
255/**
256 Value_map class.
257
258 This class works as a map. It is a collection of [key, count], where "count"
259 is the number of occurrences of "key". The class abstracts away things like
260 duplicate checking and the underlying container.
261*/
262template <class T>
263class Value_map final : public Value_map_base {
264 private:
268
270
271 public:
273 : Value_map_base(charset, data_type),
274 m_value_map(typename value_map_type::allocator_type(&m_mem_root)) {}
275
276 size_t size() const override { return m_value_map.size(); }
277
278 typename value_map_type::const_iterator begin() const {
279 return m_value_map.cbegin();
280 }
281
282 typename value_map_type::const_iterator end() const {
283 return m_value_map.cend();
284 }
285
286 bool add_values(const T &value, const ha_rows count);
287
288 /**
289 Insert a range of values into the Value_map.
290
291 Values in the range (begin, end] must be sorted according to
292 Histogram_comparator. Note that this function is currently only used in
293 unit testing.
294
295 @note The value map must be empty before calling this function.
296
297 @param begin Iterator that points to the beginning of the range.
298 @param end Iterator that points to the end of the range.
299
300 @return false on success, true on error (OOM or similar).
301 */
302 bool insert(typename value_map_type::const_iterator begin,
303 typename value_map_type::const_iterator end);
304
305 Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
306 const std::string &db_name,
307 const std::string &tbl_name,
308 const std::string &col_name) const override;
309
310 /// @return the overhead in bytes for each distinct value stored in the
311 /// Value_map. The value 32 is obtained from both GCC 8.2 and
312 /// Clang 8.0 (same as sizeof(value_map_type::node_type) in C++17).
313 size_t element_overhead() const override {
314 // TODO: Replace this with sizeof(value_map_type::node_type) when we have
315 // full C++17 support.
316 return sizeof(typename value_map_type::value_type) +
317 sizeof(typename value_map_type::key_type) + 32;
318 }
319};
320
321// Explicit template instantiations.
322template <>
323bool Histogram_comparator::operator()(const String &, const String &) const;
324
325template <>
327 const MYSQL_TIME &) const;
328
329template <>
331 const my_decimal &) const;
332
333} // namespace histograms
334
335#endif
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:67
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:166
Histogram base class.
Definition: histogram.h:312
The abstract base class for all Value_map types.
Definition: value_map.h:169
virtual Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const =0
Create a Histogram from this Value_map.
virtual size_t size() const =0
Returns the number of [value, count] pairs in the Value_map.
const CHARSET_INFO * m_charset
Definition: value_map.h:172
virtual ~Value_map_base()=default
bool add_values(const T &value, const ha_rows count)
Add a value with the given count to this Value_map.
Definition: value_map.cc:86
double get_sampling_rate() const
Definition: value_map.h:233
void add_null_values(const ha_rows count)
Increase the number of null values with the given count.
Definition: value_map.h:208
Value_map_type get_data_type() const
Definition: value_map.h:248
virtual size_t element_overhead() const =0
ha_rows m_num_null_values
Definition: value_map.h:173
MEM_ROOT m_mem_root
Definition: value_map.h:177
double m_sampling_rate
Definition: value_map.h:171
Value_map_base(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.cc:77
void set_sampling_rate(double sampling_rate)
Set the sampling rate that was used to generate this Value_map.
Definition: value_map.h:240
const CHARSET_INFO * get_character_set() const
Definition: value_map.h:245
ha_rows get_num_null_values() const
Definition: value_map.h:211
const Value_map_type m_data_type
Definition: value_map.h:174
Value_map class.
Definition: value_map.h:263
value_map_type::const_iterator end() const
Definition: value_map.h:282
size_t element_overhead() const override
Definition: value_map.h:313
bool insert(typename value_map_type::const_iterator begin, typename value_map_type::const_iterator end)
Insert a range of values into the Value_map.
Definition: value_map.cc:136
bool add_values(const T &value, const ha_rows count)
Definition: value_map.cc:92
size_t size() const override
Returns the number of [value, count] pairs in the Value_map.
Definition: value_map.h:276
Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const override
Create a Histogram from this Value_map.
Definition: value_map.cc:150
std::map< T, ha_rows, Histogram_comparator, Mem_root_allocator< std::pair< const T, ha_rows > > > value_map_type
Definition: value_map.h:267
value_map_type m_value_map
Definition: value_map.h:269
value_map_type::const_iterator begin() const
Definition: value_map.h:278
Value_map(const CHARSET_INFO *charset, Value_map_type data_type)
Definition: value_map.h:272
Equi-height bucket.
Definition: equi_height_bucket.h:53
const T & get_upper_inclusive() const
Definition: equi_height_bucket.h:103
const T & get_lower_inclusive() const
Definition: equi_height_bucket.h:98
my_decimal class limits 'decimal_t' type to what we need in MySQL.
Definition: my_decimal.h:94
static MEM_ROOT mem_root
Definition: client_plugin.cc:113
A better implementation of the UNIX ctype(3) library.
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1140
static int count
Definition: myisam_ftdump.cc:44
Time declarations shared between the server and client API: you should not add anything to this heade...
int key_type
Definition: http_request.h:49
uint16_t value_type
Definition: vt100.h:183
const std::string charset("charset")
Definition: column_statistics.h:33
static const size_t HISTOGRAM_MAX_COMPARE_LENGTH
The maximum number of characters to evaluate when building histograms.
Definition: value_map.h:62
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:32
const char * db_name
Definition: rules_table_service.cc:54
std::map< Key, Value, Compare, ut::allocator< std::pair< const Key, Value > > > map
Specialization of map which uses ut_allocator.
Definition: ut0new.h:2891
Definition: m_ctype.h:422
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:82
Definition: mysql_time.h:81
Definition: completion_hash.h:39
Histogram comparator.
Definition: value_map.h:70
bool operator()(const SingletonBucket< T > &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:140
bool operator()(const equi_height::Bucket< T > &a, const T &b) const
Used by std::lower_bound when computing equal-to and less-than selectivity to find the first bucket w...
Definition: value_map.h:92
bool operator()(const T &lhs, const T &rhs) const
Overload operator(), so that we can use this struct as a custom comparator in std classes/functions.
Definition: value_map.h:83
bool operator()(const T &a, const equi_height::Bucket< T > &b) const
Used by std::upper_bound when computing greater-than selectivity in order to find the first bucket wi...
Definition: value_map.h:112
bool operator()(const T &a, const SingletonBucket< T > &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:120
bool operator()(const SingletonBucket< T > &a, const T &b) const
Same as above, but for singleton histogram buckets.
Definition: value_map.h:100
bool operator()(const equi_height::Bucket< T > &a, const equi_height::Bucket< T > &b) const
Used by std::is_sorted to verify that equi-height histogram buckets are stored in sorted order.
Definition: value_map.h:130
Definition: singleton.h:106
T value
Definition: singleton.h:107