MySQL 8.4.0
Source Code Documentation
equi_height.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_EQUI_HEIGHT_INCLUDED
2#define HISTOGRAMS_EQUI_HEIGHT_INCLUDED
3
4/* Copyright (c) 2016, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file sql/histograms/equi_height.h
29 Equi-height histogram.
30
31 This file defines the class representing an equi-height histogram.
32
33 An equi-height histogram converted to a JSON object, follows the following
34 "schema":
35
36 {
37 // Last time the histogram was updated. As of now, this means "when the
38 // histogram was created" (incremental updates are not supported). Date/time
39 // is given in UTC.
40 // -- J_DATETIME
41 "last-updated": "2015-11-04 15:19:51.000000",
42
43 // Histogram type. Always "equi-height" for equi-height histograms.
44 // -- J_STRING
45 "histogram-type": "equi-height",
46
47 // Fraction of NULL values. This is the total fraction of NULL values in the
48 // original data set.
49 // -- J_DOUBLE
50 "null-values": 0.1,
51
52 // Histogram buckets. May be an empty array, if for instance the source
53 // only contain NULL values.
54 // -- J_ARRAY
55 "buckets":
56 [
57 [
58 // Lower inclusive value.
59 // -- Data type depends on the source column.
60 "0",
61
62 // Upper inclusive value.
63 // -- Data type depends on the source column.
64 "002a38227ecc7f0d952e85ffe37832d3f58910da",
65
66 // Cumulative frequence
67 // -- J_DOUBLE
68 0.001978728666831561,
69
70 // Number of distinct values in this bucket.
71 // -- J_UINT
72 10
73 ]
74 ]
75 }
76*/
77
78#include <cstddef> // size_t
79#include <string> // std::string
80
81#include "sql/histograms/equi_height_bucket.h" // IWYU pragma: keep
82#include "sql/histograms/histogram.h" // Histogram, value_map_type
84#include "sql/mem_root_array.h"
85
86class Json_array;
87class Json_object;
88namespace histograms {
89struct Histogram_comparator;
90} // namespace histograms
91struct MEM_ROOT;
92template <class T>
94
95namespace histograms {
96
97namespace equi_height {
98template <class T>
99class Bucket;
100} // namespace equi_height
101
102template <class T>
103class Equi_height : public Histogram {
104 public:
105 /**
106 Equi-height histogram factory method.
107
108 Attempts to allocate and initialize an equi-height histogram on the supplied
109 mem_root. This will not build the histogram, but only set its properties.
110 If the attempt to allocate the histogram fails or if an error occurs during
111 construction we return nullptr.
112
113 @param mem_root the mem_root where the histogram contents will be allocated
114 @param db_name name of the database this histogram represents
115 @param tbl_name name of the table this histogram represents
116 @param col_name name of the column this histogram represents
117 @param data_type the type of data that this histogram contains
118
119 @return A pointer to an equi-height histogram on success. Returns nullptr on
120 error.
121 */
122 static Equi_height<T> *create(MEM_ROOT *mem_root, const std::string &db_name,
123 const std::string &tbl_name,
124 const std::string &col_name,
125 Value_map_type data_type);
126
127 /**
128 Make a clone of this histogram on a MEM_ROOT.
129
130 @param mem_root the MEM_ROOT to allocate the new histogram contents on.
131
132 @return a copy of the histogram allocated on the provided MEM_ROOT.
133 */
134 Histogram *clone(MEM_ROOT *mem_root) const override;
135
136 Equi_height(const Equi_height<T> &other) = delete;
137
138 /**
139 Build the histogram.
140
141 This function will build a new histogram from a "value map". The function
142 will create at most num_buckets buckets, but may use less.
143
144 @param value_map a value map, where the map key is a value and the
145 map value is the absolute frequency for that value
146 @param num_buckets maximum number of buckets to create
147
148 @return true on error, false otherwise
149 */
150 bool build_histogram(const Value_map<T> &value_map, size_t num_buckets);
151
152 /**
153 Find the fraction of values equal to 'value'.
154
155 This function will estimate the fraction of values that is equal to the
156 provided value.
157
158 @param value The value to estimate the selectivity for.
159
160 @return The selectivity between 0.0 and 1.0 inclusive.
161 */
162 double get_equal_to_selectivity(const T &value) const;
163
164 /**
165 Find the fraction of values that is less than 'value'.
166
167 This function will estimate the fraction of values that is less than the
168 provided value.
169
170 @param value The value to estimate the selectivity for.
171
172 @return the selectivity between 0.0 and 1.0 inclusive.
173 */
174 double get_less_than_selectivity(const T &value) const;
175
176 /**
177 Find the fraction of values that is greater than 'value'.
178
179 This function will estimate the fraction of values that is greater than the
180 provided value.
181
182 @param value The value to estimate the selectivity for.
183
184 @return the selectivity between 0.0 and 1.0 inclusive.
185 */
186 double get_greater_than_selectivity(const T &value) const;
187
188 /**
189 @return number of buckets in this histogram
190 */
191 size_t get_num_buckets() const override { return m_buckets.size(); }
192
193 /**
194 Get the estimated number of distinct non-NULL values.
195 @return number of distinct non-NULL values
196 */
197 size_t get_num_distinct_values() const override;
198
199 /**
200 Convert this histogram to a JSON object.
201
202 This function will take the contents of the current histogram and put
203 it in the output parameter "json_object".
204
205 @param[in,out] json_object output where the histogram is to be stored The
206 caller is responsible for allocating/deallocating the JSON
207 object
208
209 @return true on error, false otherwise
210 */
211 bool histogram_to_json(Json_object *json_object) const override;
212
213 /**
214 Get the buckets of the histogram. Exposed for unit testing.
215
216 @return A const reference to the collection of buckets.
217 */
219 return m_buckets;
220 }
221
222 /**
223 Returns the histogram type as a readable string.
224
225 @return a readable string representation of the histogram type
226 */
227 std::string histogram_type_to_str() const override;
228
229 protected:
230 /**
231 Populate this histogram with contents from a JSON object.
232
233 @param json_object a JSON object that represents an Equi-height histogram
234 @param context error context for validation
235
236 @return True on error, false otherwise.
237 */
238 bool json_to_histogram(const Json_object &json_object,
239 Error_context *context) override;
240
241 private:
242 /// String representation of the histogram type EQUI-HEIGHT.
243 static constexpr const char *equi_height_str() { return "equi-height"; }
244
245 /**
246 Equi-height constructor.
247
248 This will not build the histogram, but only set its properties.
249
250 @param mem_root the mem_root where the histogram contents will be allocated
251 @param db_name name of the database this histogram represents
252 @param tbl_name name of the table this histogram represents
253 @param col_name name of the column this histogram represents
254 @param data_type the type of data that this histogram contains
255 @param[out] error is set to true if an error occurs
256 */
257 Equi_height(MEM_ROOT *mem_root, const std::string &db_name,
258 const std::string &tbl_name, const std::string &col_name,
259 Value_map_type data_type, bool *error);
260
261 /**
262 Equi-height copy-constructor
263
264 This will take a copy of the histogram and all of its contents on the
265 provided MEM_ROOT.
266
267 @param mem_root the MEM_ROOT to allocate the new histogram on.
268 @param other the histogram to take a copy of
269 @param[out] error is set to true if an error occurs
270 */
271 Equi_height(MEM_ROOT *mem_root, const Equi_height<T> &other, bool *error);
272
273 /**
274 Create Equi-height buckets from a JSON array.
275
276 This function will add new buckets to the current histogram by going through
277 the provided JSON array. Contents are allocated as needed on the current
278 histograms MEM_ROOT.
279
280 @param json_bucket a JSON array containing the histogram buckets
281 @param context error context for validation
282
283 @return true on error, false otherwise
284 */
285 bool add_bucket_from_json(const Json_array *json_bucket,
286 Error_context *context);
287
288 /// The buckets for this histogram.
290};
291
292} // namespace histograms
293
294#endif
Represents a JSON array container, i.e.
Definition: json_dom.h:516
Represents a JSON container value of type "object" (ECMA), type J_OBJECT here.
Definition: json_dom.h:369
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:68
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:426
Definition: equi_height.h:103
double get_less_than_selectivity(const T &value) const
Find the fraction of values that is less than 'value'.
Definition: equi_height.cc:720
Equi_height(const Equi_height< T > &other)=delete
Histogram * clone(MEM_ROOT *mem_root) const override
Make a clone of this histogram on a MEM_ROOT.
Definition: equi_height.cc:657
Mem_root_array< equi_height::Bucket< T > > m_buckets
The buckets for this histogram.
Definition: equi_height.h:289
size_t get_num_buckets() const override
Definition: equi_height.h:191
bool histogram_to_json(Json_object *json_object) const override
Convert this histogram to a JSON object.
Definition: equi_height.cc:468
std::string histogram_type_to_str() const override
Returns the histogram type as a readable string.
Definition: equi_height.cc:496
static Equi_height< T > * create(MEM_ROOT *mem_root, const std::string &db_name, const std::string &tbl_name, const std::string &col_name, Value_map_type data_type)
Equi-height histogram factory method.
Definition: equi_height.cc:66
static constexpr const char * equi_height_str()
String representation of the histogram type EQUI-HEIGHT.
Definition: equi_height.h:243
double get_equal_to_selectivity(const T &value) const
Find the fraction of values equal to 'value'.
Definition: equi_height.cc:682
const Mem_root_array< equi_height::Bucket< T > > & get_buckets() const
Get the buckets of the histogram.
Definition: equi_height.h:218
double get_greater_than_selectivity(const T &value) const
Find the fraction of values that is greater than 'value'.
Definition: equi_height.cc:770
size_t get_num_distinct_values() const override
Get the estimated number of distinct non-NULL values.
Definition: equi_height.cc:673
bool add_bucket_from_json(const Json_array *json_bucket, Error_context *context)
Create Equi-height buckets from a JSON array.
Definition: equi_height.cc:565
bool build_histogram(const Value_map< T > &value_map, size_t num_buckets)
Build the histogram.
Definition: equi_height.cc:349
bool json_to_histogram(const Json_object &json_object, Error_context *context) override
Populate this histogram with contents from a JSON object.
Definition: equi_height.cc:501
Error context to validate given JSON object which represents a histogram.
Definition: histogram.h:209
Histogram base class.
Definition: histogram.h:314
Value_map class.
Definition: value_map.h:264
Equi-height bucket.
Definition: equi_height_bucket.h:54
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
Equi-height bucket.
Histogram base class.
Definition: column_statistics.h:34
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:33
const char * db_name
Definition: rules_table_service.cc:55
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83