MySQL 9.0.0
Source Code Documentation
singleton.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_SINGLETON_INCLUDED
2#define HISTOGRAMS_SINGLETON_INCLUDED
3
4/* Copyright (c) 2016, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file sql/histograms/singleton.h
29 Singleton histogram.
30
31 This file defines the Singleton histogram. A Singleton histogram is a
32 histogram where only a value and it's frequency is stored. It allows us to
33 use less storage space, as well as estimating selectivity a bit more
34 efficient.
35
36 A singleton histogram converted to a JSON object, follows the following
37 "schema":
38
39 {
40 // Last time the histogram was updated. As of now, this means "when the
41 // histogram was created" (incremental updates are not supported). Date/time
42 // is given in UTC.
43 // -- J_DATETIME
44 "last-updated": "2015-11-04 15:19:51.000000",
45
46 // Histogram type. Always "singleton" for singleton histograms.
47 // -- J_STRING
48 "histogram-type": "singleton",
49
50 // Fraction of NULL values. This is the total fraction of NULL values in the
51 // original data set.
52 // -- J_DOUBLE
53 "null-values": 0.1,
54
55 // Histogram buckets. May be an empty array, if for instance the source
56 // only contains NULL values.
57 // -- J_ARRAY
58 "buckets":
59 [
60 [
61 // Value
62 // -- Data type depends on the source column.
63 42,
64
65 // Cumulative frequency
66 // -- J_DOUBLE
67 0.001978728666831561
68 ]
69 ]
70 }
71*/
72
73#include <stddef.h>
74#include <string> // std::string
75
76#include "my_inttypes.h"
77#include "mysql_time.h"
79#include "sql/histograms/histogram.h" // Histogram, Histogram_comparator,
82#include "sql/mem_root_array.h"
83#include "sql_string.h"
84
85class Json_array;
86class Json_object;
87struct MEM_ROOT;
88
89namespace histograms {
90
91/**
92 Singleton histogram.
93
94 Singleton histograms do not have a public constructor, but are instead created
95 through the factory method Singleton<T>::create() and returned by pointer.
96 This is done to ensure that we can return nullptr in case memory allocations
97 carried out during construction fail.
98
99 Likewise, the Singleton class does not have a public copy constructor, but
100 instead implements a clone() method that returns nullptr in case of failure.
101*/
102struct Histogram_comparator;
103template <class T>
104class Value_map;
105
106template <class T>
112};
113
114template <class T>
115class Singleton : public Histogram {
116 public:
117 /**
118 Singleton histogram factory method.
119
120 Attempts to allocate and initialize a singleton histogram on the supplied
121 mem_root. This will not build the histogram, but only set its properties.
122 If the attempt to allocate the histogram fails or if an error occurs during
123 construction we return nullptr.
124
125 @param mem_root the mem_root where the histogram contents will be allocated
126 @param db_name name of the database this histogram represents
127 @param tbl_name name of the table this histogram represents
128 @param col_name name of the column this histogram represents
129 @param data_type the type of data that this histogram contains
130
131 @return A pointer to a Singleton histogram on success. Returns nullptr on
132 error.
133 */
134 static Singleton<T> *create(MEM_ROOT *mem_root, const std::string &db_name,
135 const std::string &tbl_name,
136 const std::string &col_name,
137 Value_map_type data_type);
138
139 /**
140 Make a clone of this histogram on a MEM_ROOT.
141
142 @param mem_root the MEM_ROOT to allocate the new histogram contents on.
143
144 @return a copy of the histogram allocated on the provided MEM_ROOT.
145 */
146 Histogram *clone(MEM_ROOT *mem_root) const override;
147
148 Singleton(const Singleton<T> &other) = delete;
149
150 /**
151 Build the Singleton histogram.
152
153 @param value_map values to create the histogram for
154 @param num_buckets the number of buckets specified/requested by the user
155
156 @return true on error, false otherwise
157 */
158 bool build_histogram(const Value_map<T> &value_map, size_t num_buckets);
159
160 /**
161 Convert this histogram to a JSON object.
162
163 This function will take the contents of the current histogram and put
164 it in the output parameter "json_object".
165
166 @param[in,out] json_object output where the histogram is to be stored. The
167 caller is responsible for allocating/deallocating the JSON
168 object
169
170 @return true on error, false otherwise
171 */
172 bool histogram_to_json(Json_object *json_object) const override;
173
174 /**
175 @return number of values/buckets in this histogram
176 */
177 size_t get_num_buckets() const override { return m_buckets.size(); }
178
179 /**
180 Get the estimated number of distinct non-NULL values.
181 @return number of distinct non-NULL values
182
183 TODO(christiani): If the histogram is based on sampling, then this estimate
184 is potentially off by a factor 1/sampling_rate. It should be adjusted to an
185 actual estimate if we are going to use it.
186 */
187 size_t get_num_distinct_values() const override { return get_num_buckets(); }
188
189 /**
190 Returns the histogram type as a readable string.
191
192 @return a readable string representation of the histogram type
193 */
194 std::string histogram_type_to_str() const override;
195
196 /**
197 Find the number of values equal to 'value'.
198
199 This function will estimate the number of values that is equal to the
200 provided value.
201
202 @param value The value to estimate the selectivity for.
203
204 @return the selectivity between 0.0 and 1.0 inclusive.
205 */
206 double get_equal_to_selectivity(const T &value) const;
207
208 /**
209 Find the number of values less than 'value'.
210
211 This function will estimate the number of values that is less than the
212 provided value.
213
214 @param value The value to estimate the selectivity for.
215
216 @return the selectivity between 0.0 and 1.0 inclusive.
217 */
218 double get_less_than_selectivity(const T &value) const;
219
220 /**
221 Find the number of values greater than 'value'.
222
223 This function will estimate the number of values that is greater than the
224 provided value.
225
226 @param value The value to estimate the selectivity for.
227
228 @return the selectivity between 0.0 and 1.0 inclusive.
229 */
230 double get_greater_than_selectivity(const T &value) const;
231
232 protected:
233 /**
234 Populate this histogram with contents from a JSON object.
235
236 @param json_object a JSON object that represents an Singleton histogram
237 @param context error context for validation
238
239 @return true on error, false otherwise.
240 */
241 bool json_to_histogram(const Json_object &json_object,
242 Error_context *context) override;
243
244 private:
245 /// String representation of the histogram type SINGLETON.
246 static constexpr const char *singleton_str() { return "singleton"; }
247
248 /**
249 Singleton constructor.
250
251 This will not build the histogram, but only set its properties.
252
253 @param mem_root the mem_root where the histogram contents will be allocated
254 @param db_name name of the database this histogram represents
255 @param tbl_name name of the table this histogram represents
256 @param col_name name of the column this histogram represents
257 @param data_type the type of data that this histogram contains
258 @param[out] error is set to true if an error occurs
259 */
260 Singleton(MEM_ROOT *mem_root, const std::string &db_name,
261 const std::string &tbl_name, const std::string &col_name,
262 Value_map_type data_type, bool *error);
263
264 /**
265 Singleton copy-constructor
266
267 This will take a copy of the histogram and all of its contents on the
268 provided MEM_ROOT.
269
270 @param mem_root the MEM_ROOT to allocate the new histogram on.
271 @param other the histogram to take a copy of
272 @param[out] error is set to true if an error occurs
273 */
274 Singleton(MEM_ROOT *mem_root, const Singleton<T> &other, bool *error);
275
276 /**
277 Add value to a JSON bucket
278
279 This function adds the value to the supplied JSON array.
280
281 @param value the value to add
282 @param[out] json_bucket a JSON array where the bucket data is to be stored
283
284 @return true on error, false otherwise
285 */
286 static bool add_value_json_bucket(const T &value, Json_array *json_bucket);
287
288 /**
289 Convert one bucket to a JSON object.
290
291 @param bucket the histogram bucket to convert
292 @param[out] json_bucket a JSON array where the bucket data is to be stored
293
294 @return true on error, false otherwise
295 */
297 Json_array *json_bucket);
298
299 /// The buckets for this histogram [value, cumulative frequency].
301};
302
303} // namespace histograms
304
305#endif
Represents a JSON array container, i.e.
Definition: json_dom.h:516
Represents a JSON container value of type "object" (ECMA), type J_OBJECT here.
Definition: json_dom.h:369
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:426
Error context to validate given JSON object which represents a histogram.
Definition: histogram.h:209
Histogram base class.
Definition: histogram.h:314
Definition: singleton.h:115
static Singleton< T > * create(MEM_ROOT *mem_root, const std::string &db_name, const std::string &tbl_name, const std::string &col_name, Value_map_type data_type)
Singleton histogram factory method.
Definition: singleton.cc:61
size_t get_num_buckets() const override
Definition: singleton.h:177
double get_less_than_selectivity(const T &value) const
Find the number of values less than 'value'.
Definition: singleton.cc:412
bool build_histogram(const Value_map< T > &value_map, size_t num_buckets)
Build the Singleton histogram.
Definition: singleton.cc:90
bool json_to_histogram(const Json_object &json_object, Error_context *context) override
Populate this histogram with contents from a JSON object.
Definition: singleton.cc:265
size_t get_num_distinct_values() const override
Get the estimated number of distinct non-NULL values.
Definition: singleton.h:187
double get_equal_to_selectivity(const T &value) const
Find the number of values equal to 'value'.
Definition: singleton.cc:389
Mem_root_array< SingletonBucket< T > > m_buckets
The buckets for this histogram [value, cumulative frequency].
Definition: singleton.h:300
static bool create_json_bucket(const SingletonBucket< T > &bucket, Json_array *json_bucket)
Convert one bucket to a JSON object.
Definition: singleton.cc:173
bool histogram_to_json(Json_object *json_object) const override
Convert this histogram to a JSON object.
Definition: singleton.cc:145
double get_greater_than_selectivity(const T &value) const
Find the number of values greater than 'value'.
Definition: singleton.cc:428
Singleton(const Singleton< T > &other)=delete
static bool add_value_json_bucket(const T &value, Json_array *json_bucket)
Add value to a JSON bucket.
std::string histogram_type_to_str() const override
Returns the histogram type as a readable string.
Definition: singleton.cc:260
Histogram * clone(MEM_ROOT *mem_root) const override
Make a clone of this histogram on a MEM_ROOT.
Definition: singleton.cc:380
static constexpr const char * singleton_str()
String representation of the histogram type SINGLETON.
Definition: singleton.h:246
Value_map class.
Definition: value_map.h:264
static MEM_ROOT mem_root
Definition: client_plugin.cc:114
Histogram base class.
It is interface module to fixed precision decimals library.
Some integer typedefs for easier portability.
Time declarations shared between the server and client API: you should not add anything to this heade...
Definition: column_statistics.h:34
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:33
const char * db_name
Definition: rules_table_service.cc:55
Our own string classes, used pervasively throughout the executor.
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
Definition: completion_hash.h:40
Definition: singleton.h:107
SingletonBucket(T value, double cumulative_frequency)
Definition: singleton.h:110
T value
Definition: singleton.h:108
double cumulative_frequency
Definition: singleton.h:109