MySQL 8.3.0
Source Code Documentation
equi_height.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_EQUI_HEIGHT_INCLUDED
2#define HISTOGRAMS_EQUI_HEIGHT_INCLUDED
3
4/* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file sql/histograms/equi_height.h
28 Equi-height histogram.
29
30 This file defines the class representing an equi-height histogram.
31
32 An equi-height histogram converted to a JSON object, follows the following
33 "schema":
34
35 {
36 // Last time the histogram was updated. As of now, this means "when the
37 // histogram was created" (incremental updates are not supported). Date/time
38 // is given in UTC.
39 // -- J_DATETIME
40 "last-updated": "2015-11-04 15:19:51.000000",
41
42 // Histogram type. Always "equi-height" for equi-height histograms.
43 // -- J_STRING
44 "histogram-type": "equi-height",
45
46 // Fraction of NULL values. This is the total fraction of NULL values in the
47 // original data set.
48 // -- J_DOUBLE
49 "null-values": 0.1,
50
51 // Histogram buckets. May be an empty array, if for instance the source
52 // only contain NULL values.
53 // -- J_ARRAY
54 "buckets":
55 [
56 [
57 // Lower inclusive value.
58 // -- Data type depends on the source column.
59 "0",
60
61 // Upper inclusive value.
62 // -- Data type depends on the source column.
63 "002a38227ecc7f0d952e85ffe37832d3f58910da",
64
65 // Cumulative frequence
66 // -- J_DOUBLE
67 0.001978728666831561,
68
69 // Number of distinct values in this bucket.
70 // -- J_UINT
71 10
72 ]
73 ]
74 }
75*/
76
77#include <cstddef> // size_t
78#include <string> // std::string
79
80#include "sql/histograms/equi_height_bucket.h" // IWYU pragma: keep
81#include "sql/histograms/histogram.h" // Histogram, value_map_type
83#include "sql/mem_root_array.h"
84
85class Json_array;
86class Json_object;
87namespace histograms {
88struct Histogram_comparator;
89} // namespace histograms
90struct MEM_ROOT;
91template <class T>
93
94namespace histograms {
95
96namespace equi_height {
97template <class T>
98class Bucket;
99} // namespace equi_height
100
101template <class T>
102class Equi_height : public Histogram {
103 public:
104 /**
105 Equi-height histogram factory method.
106
107 Attempts to allocate and initialize an equi-height histogram on the supplied
108 mem_root. This will not build the histogram, but only set its properties.
109 If the attempt to allocate the histogram fails or if an error occurs during
110 construction we return nullptr.
111
112 @param mem_root the mem_root where the histogram contents will be allocated
113 @param db_name name of the database this histogram represents
114 @param tbl_name name of the table this histogram represents
115 @param col_name name of the column this histogram represents
116 @param data_type the type of data that this histogram contains
117
118 @return A pointer to an equi-height histogram on success. Returns nullptr on
119 error.
120 */
121 static Equi_height<T> *create(MEM_ROOT *mem_root, const std::string &db_name,
122 const std::string &tbl_name,
123 const std::string &col_name,
124 Value_map_type data_type);
125
126 /**
127 Make a clone of this histogram on a MEM_ROOT.
128
129 @param mem_root the MEM_ROOT to allocate the new histogram contents on.
130
131 @return a copy of the histogram allocated on the provided MEM_ROOT.
132 */
133 Histogram *clone(MEM_ROOT *mem_root) const override;
134
135 Equi_height(const Equi_height<T> &other) = delete;
136
137 /**
138 Build the histogram.
139
140 This function will build a new histogram from a "value map". The function
141 will create at most num_buckets buckets, but may use less.
142
143 @param value_map a value map, where the map key is a value and the
144 map value is the absolute frequency for that value
145 @param num_buckets maximum number of buckets to create
146
147 @return true on error, false otherwise
148 */
149 bool build_histogram(const Value_map<T> &value_map, size_t num_buckets);
150
151 /**
152 Find the fraction of values equal to 'value'.
153
154 This function will estimate the fraction of values that is equal to the
155 provided value.
156
157 @param value The value to estimate the selectivity for.
158
159 @return The selectivity between 0.0 and 1.0 inclusive.
160 */
161 double get_equal_to_selectivity(const T &value) const;
162
163 /**
164 Find the fraction of values that is less than 'value'.
165
166 This function will estimate the fraction of values that is less than the
167 provided value.
168
169 @param value The value to estimate the selectivity for.
170
171 @return the selectivity between 0.0 and 1.0 inclusive.
172 */
173 double get_less_than_selectivity(const T &value) const;
174
175 /**
176 Find the fraction of values that is greater than 'value'.
177
178 This function will estimate the fraction of values that is greater than the
179 provided value.
180
181 @param value The value to estimate the selectivity for.
182
183 @return the selectivity between 0.0 and 1.0 inclusive.
184 */
185 double get_greater_than_selectivity(const T &value) const;
186
187 /**
188 @return number of buckets in this histogram
189 */
190 size_t get_num_buckets() const override { return m_buckets.size(); }
191
192 /**
193 Get the estimated number of distinct non-NULL values.
194 @return number of distinct non-NULL values
195 */
196 size_t get_num_distinct_values() const override;
197
198 /**
199 Convert this histogram to a JSON object.
200
201 This function will take the contents of the current histogram and put
202 it in the output parameter "json_object".
203
204 @param[in,out] json_object output where the histogram is to be stored The
205 caller is responsible for allocating/deallocating the JSON
206 object
207
208 @return true on error, false otherwise
209 */
210 bool histogram_to_json(Json_object *json_object) const override;
211
212 /**
213 Get the buckets of the histogram. Exposed for unit testing.
214
215 @return A const reference to the collection of buckets.
216 */
218 return m_buckets;
219 }
220
221 /**
222 Returns the histogram type as a readable string.
223
224 @return a readable string representation of the histogram type
225 */
226 std::string histogram_type_to_str() const override;
227
228 protected:
229 /**
230 Populate this histogram with contents from a JSON object.
231
232 @param json_object a JSON object that represents an Equi-height histogram
233 @param context error context for validation
234
235 @return True on error, false otherwise.
236 */
237 bool json_to_histogram(const Json_object &json_object,
238 Error_context *context) override;
239
240 private:
241 /// String representation of the histogram type EQUI-HEIGHT.
242 static constexpr const char *equi_height_str() { return "equi-height"; }
243
244 /**
245 Equi-height constructor.
246
247 This will not build the histogram, but only set its properties.
248
249 @param mem_root the mem_root where the histogram contents will be allocated
250 @param db_name name of the database this histogram represents
251 @param tbl_name name of the table this histogram represents
252 @param col_name name of the column this histogram represents
253 @param data_type the type of data that this histogram contains
254 @param[out] error is set to true if an error occurs
255 */
256 Equi_height(MEM_ROOT *mem_root, const std::string &db_name,
257 const std::string &tbl_name, const std::string &col_name,
258 Value_map_type data_type, bool *error);
259
260 /**
261 Equi-height copy-constructor
262
263 This will take a copy of the histogram and all of its contents on the
264 provided MEM_ROOT.
265
266 @param mem_root the MEM_ROOT to allocate the new histogram on.
267 @param other the histogram to take a copy of
268 @param[out] error is set to true if an error occurs
269 */
270 Equi_height(MEM_ROOT *mem_root, const Equi_height<T> &other, bool *error);
271
272 /**
273 Create Equi-height buckets from a JSON array.
274
275 This function will add new buckets to the current histogram by going through
276 the provided JSON array. Contents are allocated as needed on the current
277 histograms MEM_ROOT.
278
279 @param json_bucket a JSON array containing the histogram buckets
280 @param context error context for validation
281
282 @return true on error, false otherwise
283 */
284 bool add_bucket_from_json(const Json_array *json_bucket,
285 Error_context *context);
286
287 /// The buckets for this histogram.
289};
290
291} // namespace histograms
292
293#endif
Represents a JSON array container, i.e.
Definition: json_dom.h:514
Represents a JSON container value of type "object" (ECMA), type J_OBJECT here.
Definition: json_dom.h:367
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:67
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:425
Definition: equi_height.h:102
double get_less_than_selectivity(const T &value) const
Find the fraction of values that is less than 'value'.
Definition: equi_height.cc:719
Equi_height(const Equi_height< T > &other)=delete
Histogram * clone(MEM_ROOT *mem_root) const override
Make a clone of this histogram on a MEM_ROOT.
Definition: equi_height.cc:656
Mem_root_array< equi_height::Bucket< T > > m_buckets
The buckets for this histogram.
Definition: equi_height.h:288
size_t get_num_buckets() const override
Definition: equi_height.h:190
bool histogram_to_json(Json_object *json_object) const override
Convert this histogram to a JSON object.
Definition: equi_height.cc:467
std::string histogram_type_to_str() const override
Returns the histogram type as a readable string.
Definition: equi_height.cc:495
static Equi_height< T > * create(MEM_ROOT *mem_root, const std::string &db_name, const std::string &tbl_name, const std::string &col_name, Value_map_type data_type)
Equi-height histogram factory method.
Definition: equi_height.cc:65
static constexpr const char * equi_height_str()
String representation of the histogram type EQUI-HEIGHT.
Definition: equi_height.h:242
double get_equal_to_selectivity(const T &value) const
Find the fraction of values equal to 'value'.
Definition: equi_height.cc:681
const Mem_root_array< equi_height::Bucket< T > > & get_buckets() const
Get the buckets of the histogram.
Definition: equi_height.h:217
double get_greater_than_selectivity(const T &value) const
Find the fraction of values that is greater than 'value'.
Definition: equi_height.cc:769
size_t get_num_distinct_values() const override
Get the estimated number of distinct non-NULL values.
Definition: equi_height.cc:672
bool add_bucket_from_json(const Json_array *json_bucket, Error_context *context)
Create Equi-height buckets from a JSON array.
Definition: equi_height.cc:564
bool build_histogram(const Value_map< T > &value_map, size_t num_buckets)
Build the histogram.
Definition: equi_height.cc:348
bool json_to_histogram(const Json_object &json_object, Error_context *context) override
Populate this histogram with contents from a JSON object.
Definition: equi_height.cc:500
Error context to validate given JSON object which represents a histogram.
Definition: histogram.h:207
Histogram base class.
Definition: histogram.h:312
Value_map class.
Definition: value_map.h:263
Equi-height bucket.
Definition: equi_height_bucket.h:53
static MEM_ROOT mem_root
Definition: client_plugin.cc:113
Equi-height bucket.
Histogram base class.
Definition: column_statistics.h:33
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:32
const char * db_name
Definition: rules_table_service.cc:54
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:82