MySQL  8.0.19
Source Code Documentation
value_map.h
Go to the documentation of this file.
1 #ifndef HISTOGRAMS_VALUE_MAP_INCLUDED
2 #define HISTOGRAMS_VALUE_MAP_INCLUDED
3 
4 /* Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file sql/histograms/value_map.h
28 */
29 
30 #include <stddef.h>
31 #include <functional> // std::less
32 #include <map>
33 #include <string>
34 #include <utility> // std::pair
35 
36 #include "m_ctype.h"
37 #include "my_alloc.h"
38 #include "my_base.h" // ha_rows
39 #include "mysql_time.h"
41 
42 class String;
43 class my_decimal;
44 template <class T>
45 class Memroot_allocator;
46 
47 namespace histograms {
48 
49 class Histogram;
50 namespace equi_height {
51 template <class T>
52 class Bucket;
53 } // namespace equi_height
54 
55 /**
56  The maximum number of characters to evaluate when building histograms. For
57  binary/blob values, this is the number of bytes to consider.
58 */
59 static const size_t HISTOGRAM_MAX_COMPARE_LENGTH = 42;
60 
61 /**
62  Histogram comparator.
63 
64  Typical usage is in a "value map", where we for instance need to sort based
65  on string collation and similar.
66 */
68  public:
69  /**
70  Overload operator(), so that we can use this struct as a custom comparator
71  in std classes/functions.
72 
73  @param lhs first value to compare
74  @param rhs second value to compare
75 
76  @return true if lhs is considered to be smaller/less than rhs. false
77  otherwise.
78  */
79  template <class T>
80  bool operator()(const T &lhs, const T &rhs) const {
81  return std::less<T>()(lhs, rhs);
82  }
83 
84  template <class T>
85  bool operator()(const equi_height::Bucket<T> &a, const T &b) const {
87  }
88 
89  template <class T>
91  const equi_height::Bucket<T> &b) const {
94  }
95 };
96 
97 /**
98  The abstract base class for all Value_map types.
99 
100  We would ideally like to have only one class for the Value map concept (no
101  inheritance) which would gives us an easier interface. But there are some
102  reasons for why we need to to split the class into a non-templated base class
103  and a templated subclass:
104 
105  - We are collecting Value_maps in a collection/vector where they have a
106  different template type. This cannot be achieved unless we have a
107  non-templated base class.
108 
109  - When working on a collection of Value_maps, it is more convenient to declare
110  the interface in the base class (Value_map_base) so that we don't need to
111  do a cast to the subclass in order to get hold of the methods we want to
112  use.
113 
114  - Value_map_base::add_values and Value_map::add_values looks like the same
115  function, but they are not. Value_map_base::add_values is a small functions
116  that helps us cast the Value_map<T> to the correct type (for instance
117  Value_map<longlong>). Ideally, this function would have been pure virtual,
118  but it's not possible to have virtual member function templates.
119 */
121  private:
126 
127  protected:
129 
130  public:
131  Value_map_base(const CHARSET_INFO *charset, double sampling_rate,
132  Value_map_type data_type);
133 
134  virtual ~Value_map_base() {}
135 
136  /**
137  Returns the number of [value, count] pairs in the Value_map.
138 
139  @return The number of values in the Value_map.
140  */
141  virtual size_t size() const = 0;
142 
143  /**
144  Add a value with the given count to this Value_map. If the given value
145  already exists, the count will be added to the existing count.
146 
147  @param value The value to add.
148  @param count Count of the value to add.
149 
150  @return false on success, and true in case of errors (OOM).
151  */
152  template <class T>
153  bool add_values(const T &value, const ha_rows count);
154 
155  /**
156  Increase the number of null values with the given count.
157 
158  @param count The number of null values.
159  */
161 
162  /// @return The number of null values in this Value_map.
164 
165  /**
166  Create a Histogram from this Value_map.
167 
168  The resulting histogram will have at most "num_buckets" buckets (might be
169  less), and all of its contents will be allocated on the supplied MEM_ROOT.
170 
171  @param mem_root The MEM_ROOT to allocate the contents on
172  @param num_buckets Maximum number of buckets to create
173  @param db_name Database name
174  @param tbl_name Table name
175  @param col_name Column name
176 
177  @return nullptr on error, or a valid histogram if success.
178  */
179  virtual Histogram *build_histogram(MEM_ROOT *mem_root, size_t num_buckets,
180  const std::string &db_name,
181  const std::string &tbl_name,
182  const std::string &col_name) const = 0;
183 
184  /// @return The sampling rate that was used to generate this Value_map.
185  double get_sampling_rate() const { return m_sampling_rate; }
186 
187  /**
188  Set the sampling rate that was used to generate this Value_map.
189 
190  @param sampling_rate The sampling rate.
191  */
192  void set_sampling_rate(double sampling_rate) {
193  m_sampling_rate = sampling_rate;
194  }
195 
196  /// @return the character set for the data this Value_map contains
197  const CHARSET_INFO *get_character_set() const { return m_charset; }
198 
199  /// @return the data type that this Value_map contains
201 
202  /// @return the overhead in bytes for each distinct value stored in the
203  /// Value_map.
204  virtual size_t element_overhead() const = 0;
205 };
206 
207 /**
208  Value_map class.
209 
210  This class works as a map. It is a collection of [key, count], where "count"
211  is the number of occurances of "key". The class abstracts away things like
212  duplicate checking and the underlying container.
213 */
214 template <class T>
215 class Value_map final : public Value_map_base {
216  private:
217  using value_map_type =
218  std::map<T, ha_rows, Histogram_comparator,
220 
222 
223  public:
224  Value_map(const CHARSET_INFO *charset, Value_map_type data_type,
225  double sampling_rate = 0.0)
226  : Value_map_base(charset, sampling_rate, data_type),
227  m_value_map(typename value_map_type::allocator_type(&m_mem_root)) {}
228 
229  size_t size() const override { return m_value_map.size(); }
230 
231  typename value_map_type::const_iterator begin() const {
232  return m_value_map.cbegin();
233  }
234 
235  typename value_map_type::const_iterator end() const {
236  return m_value_map.cend();
237  }
238 
239  bool add_values(const T &value, const ha_rows count);
240 
241  /**
242  Insert a range of values into the Value_map.
243 
244  Values in the range (begin, end] must be sorted according to
245  Histogram_comparator. Note that this function is currently only used in
246  unit testing.
247 
248  @note The value map must be empty before calling this function.
249 
250  @param begin Iterator that points to the beginning of the range.
251  @param end Iterator that points to the end of the range.
252 
253  @return false on success, true on error (OOM or similar).
254  */
255  bool insert(typename value_map_type::const_iterator begin,
256  typename value_map_type::const_iterator end);
257 
258  virtual Histogram *build_histogram(
259  MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name,
260  const std::string &tbl_name, const std::string &col_name) const override;
261 
262  /// @return the overhead in bytes for each distinct value stored in the
263  /// Value_map. The value 32 is obtained from both GCC 8.2 and
264  /// Clang 8.0 (same as sizeof(value_map_type::node_type) in C++17).
265  size_t element_overhead() const override {
266  // TODO: Replace this with sizeof(value_map_type::node_type) when we have
267  // full C++17 support.
268  return sizeof(typename value_map_type::value_type) +
269  sizeof(typename value_map_type::key_type) + 32;
270  }
271 };
272 
273 // Explicit template instantiations.
274 template <>
275 bool Histogram_comparator::operator()(const String &, const String &) const;
276 
277 template <>
279  const MYSQL_TIME &) const;
280 
281 template <>
283  const my_decimal &) const;
284 
285 } // namespace histograms
286 
287 #endif
histograms::Value_map::build_histogram
virtual Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const override
Definition: value_map.cc:149
histograms::equi_height::Bucket::get_lower_inclusive
const T & get_lower_inclusive() const
Definition: equi_height_bucket.h:98
histograms::Value_map_base::Value_map_base
Value_map_base(const CHARSET_INFO *charset, double sampling_rate, Value_map_type data_type)
Definition: value_map.cc:75
histograms::Value_map_base::m_charset
const CHARSET_INFO * m_charset
Definition: value_map.h:123
my_base.h
histograms::Value_map::size
size_t size() const override
Definition: value_map.h:229
histograms::Value_map_base::get_sampling_rate
double get_sampling_rate() const
Definition: value_map.h:185
histograms::Histogram
Histogram base class.
Definition: histogram.h:136
histograms::Value_map::m_value_map
value_map_type m_value_map
Definition: value_map.h:221
CHARSET_INFO
Definition: m_ctype.h:354
value_map_type.h
histograms::Histogram_comparator::operator()
bool operator()(const T &lhs, const T &rhs) const
Overload operator(), so that we can use this struct as a custom comparator in std classes/functions.
Definition: value_map.h:80
bucket
Definition: completion_hash.h:39
histograms::Value_map::element_overhead
size_t element_overhead() const override
Definition: value_map.h:265
String
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:164
histograms::Value_map_base::add_values
bool add_values(const T &value, const ha_rows count)
Add a value with the given count to this Value_map.
Definition: value_map.cc:85
histograms::Value_map::end
value_map_type::const_iterator end() const
Definition: value_map.h:235
mem_root
static MEM_ROOT mem_root
Definition: client_plugin.cc:109
m_mem_root
MEM_ROOT * m_mem_root
Mem root.
Definition: acl_table_user.cc:209
value
const string value("\"Value\"")
histograms::Value_map_base::m_mem_root
MEM_ROOT m_mem_root
Definition: value_map.h:128
histograms::Histogram_comparator::operator()
bool operator()(const equi_height::Bucket< T > &a, const equi_height::Bucket< T > &b) const
Definition: value_map.h:90
histograms::Value_map_base
The abstract base class for all Value_map types.
Definition: value_map.h:120
histograms::Value_map_base::size
virtual size_t size() const =0
Returns the number of [value, count] pairs in the Value_map.
my_alloc.h
histograms::Value_map::insert
bool insert(typename value_map_type::const_iterator begin, typename value_map_type::const_iterator end)
Insert a range of values into the Value_map.
Definition: value_map.cc:135
histograms::Value_map::value_map_type
std::map< T, ha_rows, Histogram_comparator, Memroot_allocator< std::pair< const T, ha_rows > >> value_map_type
Definition: value_map.h:219
histograms::Value_map_base::set_sampling_rate
void set_sampling_rate(double sampling_rate)
Set the sampling rate that was used to generate this Value_map.
Definition: value_map.h:192
histograms::Value_map_base::get_num_null_values
ha_rows get_num_null_values() const
Definition: value_map.h:163
histograms::HISTOGRAM_MAX_COMPARE_LENGTH
static const size_t HISTOGRAM_MAX_COMPARE_LENGTH
The maximum number of characters to evaluate when building histograms.
Definition: value_map.h:59
m_ctype.h
MEM_ROOT
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:77
histograms::equi_height::Bucket::get_upper_inclusive
const T & get_upper_inclusive() const
Definition: equi_height_bucket.h:103
histograms::Value_map_base::element_overhead
virtual size_t element_overhead() const =0
histograms::Value_map_base::build_histogram
virtual Histogram * build_histogram(MEM_ROOT *mem_root, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name) const =0
Create a Histogram from this Value_map.
histograms::Value_map_base::add_null_values
void add_null_values(const ha_rows count)
Increase the number of null values with the given count.
Definition: value_map.h:160
histograms::Histogram_comparator::operator()
bool operator()(const equi_height::Bucket< T > &a, const T &b) const
Definition: value_map.h:85
Vt100::value_type
uint16_t value_type
Definition: vt100.h:182
histograms::Value_map_base::get_character_set
const CHARSET_INFO * get_character_set() const
Definition: value_map.h:197
histograms::Value_map_base::m_num_null_values
ha_rows m_num_null_values
Definition: value_map.h:124
my_decimal
my_decimal class limits 'decimal_t' type to what we need in MySQL.
Definition: my_decimal.h:91
histograms::Value_map::begin
value_map_type::const_iterator begin() const
Definition: value_map.h:231
histograms::Histogram_comparator
Histogram comparator.
Definition: value_map.h:67
histograms::Value_map_base::m_data_type
const Value_map_type m_data_type
Definition: value_map.h:125
histograms::Value_map::add_values
bool add_values(const T &value, const ha_rows count)
Definition: value_map.cc:91
histograms
Definition: column_statistics.h:33
histograms::Value_map::Value_map
Value_map(const CHARSET_INFO *charset, Value_map_type data_type, double sampling_rate=0.0)
Definition: value_map.h:224
Memroot_allocator
Memroot_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: equi_height.h:92
MYSQL_TIME
Definition: mysql_time.h:81
histograms::Value_map_base::m_sampling_rate
double m_sampling_rate
Definition: value_map.h:122
key_type
static int key_type
Definition: mi_test1.cc:38
histograms::Value_map_type
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:32
rules_table_service::db_name
const char * db_name
Definition: rules_table_service.cc:54
count
ssize_t count
Definition: memcached.c:386
histograms::equi_height::Bucket
Equi-height bucket.
Definition: equi_height.h:98
ha_rows
my_off_t ha_rows
Definition: my_base.h:1132
final
#define final(a, b, c)
Definition: hash.c:109
histograms::Value_map_base::~Value_map_base
virtual ~Value_map_base()
Definition: value_map.h:134
histograms::Value_map_base::get_data_type
Value_map_type get_data_type() const
Definition: value_map.h:200
mysql_time.h