MySQL 8.3.0
Source Code Documentation
histogram.h
Go to the documentation of this file.
1#ifndef HISTOGRAMS_HISTOGRAM_INCLUDED
2#define HISTOGRAMS_HISTOGRAM_INCLUDED
3
4/* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file sql/histograms/histogram.h
28 Histogram base class.
29
30 This file defines the base class for all histogram types. We keep the base
31 class itself non-templatized in order to more easily send a histogram as an
32 argument, collect multiple histograms in a single collection etc.
33
34 A histogram is stored as a JSON object. This gives the flexibility of storing
35 virtually an unlimited number of buckets, data values in its full length and
36 easily expanding with new histogram types in the future. They are stored
37 persistently in the system table mysql.column_stats.
38
39 We keep all histogram code in the namespace "histograms" in order to avoid
40 name conflicts etc.
41*/
42
43#include <cstddef> // size_t
44#include <functional>
45#include <map> // std::map
46#include <memory>
47#include <set> // std::set
48#include <string> // std::string
49#include <utility> // std::pair
50
51#include "lex_string.h" // LEX_CSTRING
52#include "my_base.h" // ha_rows
53#include "sql/field.h" // Field
55#include "sql/mem_root_allocator.h" // Mem_root_allocator
56#include "sql/stateless_allocator.h" // Stateless_allocator
57
58class Item;
59class Json_dom;
60class Json_object;
61class THD;
62struct TYPELIB;
63class Field;
64
65namespace dd {
66class Table;
67} // namespace dd
68namespace histograms {
69struct Histogram_comparator;
70template <class T>
71class Value_map;
72} // namespace histograms
73struct CHARSET_INFO;
74struct MEM_ROOT;
75class Table_ref;
76class Json_dom;
77
78namespace histograms {
79
80/// The default (and invalid) value for "m_null_values_fraction".
81static const double INVALID_NULL_VALUES_FRACTION = -1.0;
82
83enum class Message {
88 VIEW,
96
97 // JSON validation errors. See Error_context.
122};
123
125 void *operator()(size_t s) const;
126};
127
128template <class T>
130
131template <class T>
133
134template <typename T>
136 std::map<T, ha_rows, Histogram_comparator, value_map_allocator<T>>;
137
138using columns_set = std::set<std::string, std::less<std::string>,
140
141// Used as an array, so duplicate values are not checked.
142// TODO((tlchrist): Convert this std::map to an array.
144 std::map<std::string, Message, std::less<std::string>,
146
147/**
148 The different operators we can ask histogram statistics for selectivity
149 estimations.
150*/
151enum class enum_operator {
152 EQUALS_TO,
154 LESS_THAN,
155 IS_NULL,
160 BETWEEN,
162 IN_LIST,
164};
165
166/**
167 Error context to validate given JSON object which represents a histogram.
168
169 A validation error consists of two pieces of information:
170
171 1) error code - what kind of error it is
172 2) JSON path - where the error occurs
173
174 Errors are classified into a few conceptual categories, namely
175
176 1) absence of required attributes
177 2) unexpected JSON type of attributes
178 3) value encoding corruption
179 4) value out of domain
180 5) breaking bucket sequence semantics
181 6) breaking certain constraint between pieces of information
182
183 @see histograms::Message for the list of JSON validation errors.
184
185 Use of the Error_context class
186 ------------------------------
187
188 An Error_context object is passed along with other parameters to the
189 json_to_histogram() function that is used to create a histogram object (e.g.
190 Equi_height<longlong>) from a JSON string.
191
192 The json_to_histogram() function has two different use cases, with different
193 requirements for validation:
194
195 1) Deserializing a histogram that was retrieved from the dictionary. In this
196 case the histogram has already been validated, and the user is not
197 expecting validation feedback, so we pass along a default-constructed
198 "empty shell" Error_context object with no-op operations.
199
200 2) When validating the user-supplied JSON string to the UPDATE HISTOGRAM ...
201 USING DATA commmand. In this case we pass along an active Error_context
202 object that uses a Field object to validate bucket values, and stores
203 results in a results_map.
204
205 The binary() method is used to distinguish between these two contexts/cases.
206*/
208 public:
209 /// Default constructor. Used when deserializing binary JSON that has already
210 /// been validated, e.g. when retrieving a histogram from the dictionary, and
211 /// the Error_context object is not actively used for validation.
214
215 /**
216 Constructor. Used in the context of deserializing the user-supplied JSON
217 string to the UPDATE HISTOGRAM ... USING DATA command.
218
219 @param thd Thread context
220 @param field The field for values on which the histogram is built
221 @param results Where reported errors are stored
222 */
224 : m_thd(thd), m_field(field), m_results(results), m_binary(false) {}
225
226 /**
227 Report a global error to this context.
228
229 @param err_code The global error code
230 */
231 void report_global(Message err_code);
232
233 /**
234 Report to this context that a required attribute is missing.
235
236 @param name Name of the missing attribute
237 */
238 void report_missing_attribute(const std::string &name);
239
240 /**
241 Report to this context that an error occurs on the given dom node.
242
243 @param dom The given dom node
244 @param err_code The error code
245 */
246 void report_node(const Json_dom *dom, Message err_code);
247
248 /**
249 Check if the value is in the field definition domain.
250
251 @param v Pointer to the value.
252
253 @return true on error, false otherwise
254
255 @note Uses Field::store() on the field for which the user-defined histogram
256 is to be constructed in order to check the validity of the supplied value.
257 This will have the side effect of writing to the record buffer so this
258 should only be used with an active Error_context (with a non-nullptr field)
259 when we do not otherwise expect to use the record buffer. Currently the only
260 use case is to validate the JSON input to the command UPDATE HISTOGRAM ...
261 USING DATA where it should be OK to use the field for this purpose.
262 */
263 template <typename T>
264 bool check_value(T *v);
265
266 /**
267 Tell whether the input json is an internal persisted copy or
268 a user-defined input. If the input is an internal copy, there
269 should never be type/format errors. If it is a user-defined input,
270 errors may occur and should be handled, and some type casting may
271 be needed.
272
273 @return true for JSON, false otherwise
274 */
275 bool binary() const { return m_binary; }
276
277 /**
278 Return data-type of field in context if present. Used to enforce
279 that histogram datatype matches column datatype for user-defined
280 histograms.
281
282 @return datatype string if present, nullptr if not
283 */
284 Field *field() const { return m_field; }
285
286 private:
287 /// Thread context for error handlers
289 /// The field for checking endpoint values
291 /// Where reported errors are stored
293 /// Whether or not the JSON object to process is in binary format
295};
296
297/**
298 Histogram base class.
299
300 This is an abstract class containing the interface and shared code for
301 concrete histogram subclasses.
302
303 Histogram subclasses (Singleton, Equi_height) are constructed through factory
304 methods in order to catch memory allocation errors during construction.
305
306 The histogram subclasses have no public copy or move constructors. In order to
307 copy a histogram onto a given MEM_ROOT, use the public clone method. The clone
308 method ensures that members of the histogram, such String type buckets,
309 are also allocated on the given MEM_ROOT. Modifications to these methods need
310 to be careful that histogram buckets are cloned/copied correctly.
311*/
313 public:
314 /// All supported histogram types in MySQL.
316
317 /// String representation of the JSON field "histogram-type".
318 static constexpr const char *histogram_type_str() { return "histogram-type"; }
319
320 /// String representation of the JSON field "data-type".
321 static constexpr const char *data_type_str() { return "data-type"; }
322
323 /// String representation of the JSON field "collation-id".
324 static constexpr const char *collation_id_str() { return "collation-id"; }
325
326 /// String representation of the histogram type SINGLETON.
327 static constexpr const char *singleton_str() { return "singleton"; }
328
329 /// String representation of the histogram type EQUI-HEIGHT.
330 static constexpr const char *equi_height_str() { return "equi-height"; }
331
332 protected:
334
335 /// The fraction of NULL values in the histogram (between 0.0 and 1.0).
337
338 /// The character set for the data stored
340
341 /// The number of buckets originally specified
343
344 /// String representation of the JSON field "buckets".
345 static constexpr const char *buckets_str() { return "buckets"; }
346
347 /// String representation of the JSON field "last-updated".
348 static constexpr const char *last_updated_str() { return "last-updated"; }
349
350 /// String representation of the JSON field "null-values".
351 static constexpr const char *null_values_str() { return "null-values"; }
352
353 static constexpr const char *sampling_rate_str() { return "sampling-rate"; }
354
355 /// String representation of the JSON field "number-of-buckets-specified".
356 static constexpr const char *numer_of_buckets_specified_str() {
357 return "number-of-buckets-specified";
358 }
359
360 /**
361 Constructor.
362
363 @param mem_root the mem_root where the histogram contents will be allocated
364 @param db_name name of the database this histogram represents
365 @param tbl_name name of the table this histogram represents
366 @param col_name name of the column this histogram represents
367 @param type the histogram type (equi-height, singleton)
368 @param data_type the type of data that this histogram contains
369 @param[out] error is set to true if an error occurs
370 */
371 Histogram(MEM_ROOT *mem_root, const std::string &db_name,
372 const std::string &tbl_name, const std::string &col_name,
373 enum_histogram_type type, Value_map_type data_type, bool *error);
374
375 /**
376 Copy constructor
377
378 This will make a copy of the provided histogram onto the provided MEM_ROOT.
379
380 @param mem_root the mem_root where the histogram contents will be allocated
381 @param other the histogram to copy
382 @param[out] error is set to true if an error occurs
383 */
384 Histogram(MEM_ROOT *mem_root, const Histogram &other, bool *error);
385
386 /**
387 Write the data type of this histogram into a JSON object.
388
389 @param json_object the JSON object where we will write the histogram
390 data type
391
392 @return true on error, false otherwise
393 */
394 bool histogram_data_type_to_json(Json_object *json_object) const;
395
396 /**
397 Return the value that is contained in the JSON DOM object.
398
399 For most types, this function simply returns the contained value. For String
400 values, the value is allocated on this histograms MEM_ROOT before it is
401 returned. This allows the String value to survive the entire lifetime of the
402 histogram object.
403
404 @param json_dom the JSON DOM object to extract the value from
405 @param out the value from the JSON DOM object
406 @param context error context for validation
407
408 @return true on error, false otherwise
409 */
410 template <class T>
411 bool extract_json_dom_value(const Json_dom *json_dom, T *out,
412 Error_context *context);
413
414 /**
415 Populate the histogram with data from the provided JSON object. The base
416 class also provides an implementation that subclasses must call in order
417 to populate fields that are shared among all histogram types (character set,
418 null values fraction).
419
420 @param json_object the JSON object to read the histogram data from
421 @param context error context for validation
422
423 @return true on error, false otherwise
424 */
425 virtual bool json_to_histogram(const Json_object &json_object,
426 Error_context *context) = 0;
427
428 private:
429 /// The MEM_ROOT where the histogram contents will be allocated.
431
432 /// The type of this histogram.
434
435 /// The type of the data this histogram contains.
437
438 /// Name of the database this histogram represents.
440
441 /// Name of the table this histogram represents.
443
444 /// Name of the column this histogram represents.
446
447 /**
448 An internal function for getting a selectivity estimate prior to adustment.
449 @see get_selectivity() for details.
450 */
451 bool get_raw_selectivity(Item **items, size_t item_count, enum_operator op,
452 double *selectivity) const;
453
454 /**
455 An internal function for getting the selecitvity estimation.
456
457 This function will read/evaluate the value from the given Item, and pass
458 this value on to the correct selectivity estimation function based on the
459 data type of the histogram. For instance, if the data type of the histogram
460 is INT, we will call "val_int" on the Item to evaluate the value as an
461 integer and pass this value on to the next function.
462
463 @param item The Item to read/evaluate the value from.
464 @param op The operator we are estimating the selectivity for.
465 @param typelib In the case of ENUM or SET data type, this parameter holds
466 the type information. This is needed in order to map a
467 string representation of an ENUM/SET value into its correct
468 integer representation (ENUM/SET values are stored as
469 integer values in the histogram).
470 @param[out] selectivity The estimated selectivity, between 0.0 and 1.0
471 inclusive.
472
473 @return true on error (i.e the provided item was NULL), false on success.
474 */
475 bool get_selectivity_dispatcher(Item *item, const enum_operator op,
476 const TYPELIB *typelib,
477 double *selectivity) const;
478
479 /**
480 An internal function for getting the selecitvity estimation.
481
482 This function will cast the histogram to the correct class (using down_cast)
483 and pass the given value on to the correct selectivity estimation function
484 for that class.
485
486 @param value The value to estimate the selectivity for.
487
488 @return The estimated selectivity, between 0.0 and 1.0 inclusive.
489 */
490 template <class T>
491 double get_less_than_selectivity_dispatcher(const T &value) const;
492
493 /// @see get_less_than_selectivity_dispatcher
494 template <class T>
495 double get_greater_than_selectivity_dispatcher(const T &value) const;
496
497 /// @see get_less_than_selectivity_dispatcher
498 template <class T>
499 double get_equal_to_selectivity_dispatcher(const T &value) const;
500
501 /**
502 An internal function for applying the correct function for the given
503 operator.
504
505 @param op The operator to apply
506 @param value The value to find the selectivity for.
507
508 @return The estimated selectivity, between 0.0 and 1.0 inclusive.
509 */
510 template <class T>
511 double apply_operator(const enum_operator op, const T &value) const;
512
513 public:
514 Histogram() = delete;
515 Histogram(const Histogram &other) = delete;
516
517 /// Destructor.
518 virtual ~Histogram() = default;
519
520 /// @return the MEM_ROOT that this histogram uses for allocations
521 MEM_ROOT *get_mem_root() const { return m_mem_root; }
522
523 /**
524 @return name of the database this histogram represents
525 */
527
528 /**
529 @return name of the table this histogram represents
530 */
531 const LEX_CSTRING get_table_name() const { return m_table_name; }
532
533 /**
534 @return name of the column this histogram represents
535 */
536 const LEX_CSTRING get_column_name() const { return m_column_name; }
537
538 /**
539 @return type of this histogram
540 */
542
543 /**
544 @return the fraction of NULL values, in the range [0.0, 1.0]
545 */
546 double get_null_values_fraction() const;
547
548 /// @return the character set for the data this histogram contains
549 const CHARSET_INFO *get_character_set() const { return m_charset; }
550
551 /// @return the sampling rate used to generate this histogram
552 double get_sampling_rate() const { return m_sampling_rate; }
553
554 /**
555 Returns the histogram type as a readable string.
556
557 @return a readable string representation of the histogram type
558 */
559 virtual std::string histogram_type_to_str() const = 0;
560
561 /**
562 @return number of buckets in this histogram
563 */
564 virtual size_t get_num_buckets() const = 0;
565
566 /**
567 Get the estimated number of distinct non-NULL values.
568 @return number of distinct non-NULL values
569 */
570 virtual size_t get_num_distinct_values() const = 0;
571
572 /**
573 @return the data type that this histogram contains
574 */
576
577 /**
578 @return number of buckets originally specified by the user. This may be
579 higher than the actual number of buckets in the histogram.
580 */
582
583 /**
584 Converts the histogram to a JSON object.
585
586 @param[in,out] json_object output where the histogram is to be stored. The
587 caller is responsible for allocating/deallocating the JSON
588 object
589
590 @return true on error, false otherwise
591 */
592 virtual bool histogram_to_json(Json_object *json_object) const = 0;
593
594 /**
595 Converts JSON object to a histogram.
596
597 @param mem_root MEM_ROOT where the histogram will be allocated
598 @param schema_name the schema name
599 @param table_name the table name
600 @param column_name the column name
601 @param json_object output where the histogram is stored
602 @param context error context for validation
603
604 @return nullptr on error. Otherwise a histogram allocated on the provided
605 MEM_ROOT.
606 */
608 const std::string &schema_name,
609 const std::string &table_name,
610 const std::string &column_name,
611 const Json_object &json_object,
612 Error_context *context);
613
614 /**
615 Make a clone of the current histogram
616
617 @param mem_root the MEM_ROOT on which the new histogram will be allocated.
618
619 @return a histogram allocated on the provided MEM_ROOT. Returns nullptr
620 on error.
621 */
622 virtual Histogram *clone(MEM_ROOT *mem_root) const = 0;
623
624 /**
625 Store this histogram to persistent storage (data dictionary).
626
627 @param thd Thread handler.
628
629 @return false on success, true on error.
630 */
631 bool store_histogram(THD *thd) const;
632
633 /**
634 Get selectivity estimation.
635
636 This function will try and get the selectivity estimation for a predicate
637 on the form "COLUMN OPERATOR CONSTANT", for instance "SELECT * FROM t1
638 WHERE col1 > 23;".
639
640 This function will take care of several of things, for instance checking
641 that the value we are estimating the selectivity for is a constant value.
642
643 The order of the Items provided does not matter. For instance, of the
644 operator argument given is "EQUALS_TO", it does not matter if the constant
645 value is provided as the first or the second argument; this function will
646 take care of this.
647
648 @param items an array of items that contains both the field we
649 are estimating the selectivity for, as well as the
650 user-provided constant values.
651 @param item_count the number of Items in the Item array.
652 @param op the predicate operator
653 @param[out] selectivity the calculated selectivity if a usable histogram was
654 found
655
656 @retval true if an error occurred (the Item provided was not a constant
657 value or similar).
658 @return false if success
659 */
660 bool get_selectivity(Item **items, size_t item_count, enum_operator op,
661 double *selectivity) const;
662
663 /**
664 @return the fraction of non-null values in the histogram.
665 */
667 return 1.0 - get_null_values_fraction();
668 }
669};
670
671/** Return true if 'histogram' was built on an empty table.*/
672inline bool empty(const Histogram &histogram) {
673 return histogram.get_num_distinct_values() == 0 &&
674 histogram.get_null_values_fraction() == 0.0;
675}
676
677/**
678 Create a histogram from a value map.
679
680 This function will build a histogram from a value map. The histogram type
681 depends on both the size of the input data, as well as the number of buckets
682 specified. If the number of distinct values is less than or equal to the
683 number of buckets, a Singleton histogram will be created. Otherwise, an
684 equi-height histogram will be created.
685
686 The histogram will be allocated on the supplied mem_root, and it is the
687 callers responsibility to properly clean up when the histogram isn't needed
688 anymore.
689
690 @param mem_root the MEM_ROOT where the histogram contents will be
691 allocated
692 @param value_map a value map containing [value, frequency]
693 @param num_buckets the maximum number of buckets to create
694 @param db_name name of the database this histogram represents
695 @param tbl_name name of the table this histogram represents
696 @param col_name name of the column this histogram represents
697
698 @return a histogram, using at most "num_buckets" buckets. The histogram
699 type depends on the size of the input data, and the number of
700 buckets
701*/
702template <class T>
703Histogram *build_histogram(MEM_ROOT *mem_root, const Value_map<T> &value_map,
704 size_t num_buckets, const std::string &db_name,
705 const std::string &tbl_name,
706 const std::string &col_name);
707
708/**
709 Create or update histograms for a set of columns of a given table.
710
711 This function will try to create histogram statistics for all the columns
712 specified. If one of the columns fail, it will continue to the next one and
713 try.
714
715 @param thd Thread handler.
716 @param table The table where we should look for the columns/data.
717 @param columns Columns specified by the user.
718 @param num_buckets The maximum number of buckets to create in each
719 histogram.
720 @param data The histogram json literal for update
721 @param results A map where the result of each operation is stored.
722
723 @return false on success, true on error.
724*/
725bool update_histogram(THD *thd, Table_ref *table, const columns_set &columns,
726 int num_buckets, LEX_STRING data, results_map &results);
727
728/**
729 Drop histograms for all columns in a given table.
730
731 @param thd Thread handler.
732 @param table The table where we should look for the columns.
733 @param original_table_def Original table definition.
734 @param results A map where the result of each operation is stored.
735
736 @note Assumes that caller owns exclusive metadata lock on the table,
737 so there is no need to lock individual statistics.
738
739 @return false on success, true on error.
740*/
742 const dd::Table &original_table_def,
743 results_map &results);
744
745/**
746 Drop histograms for a set of columns in a given table.
747
748 This function will try to drop the histogram statistics for all specified
749 columns. If one of the columns fail, it will continue to the next one and try.
750
751 @param thd Thread handler.
752 @param table The table where we should look for the columns.
753 @param columns Columns specified by the user.
754 @param results A map where the result of each operation is stored.
755
756 @note Assumes that the caller has the appropriate metadata locks on both the
757 table and column statistics. That can either be an exclusive metadata lock on
758 the table itself, or a shared metadata lock on the table combined with
759 exclusive locks on individual column statistics.
760
761 @return false on success, true on error.
762*/
763bool drop_histograms(THD *thd, Table_ref &table, const columns_set &columns,
764 results_map &results);
765
766/**
767 Rename histograms for all columns in a given table.
768
769 @param thd Thread handler.
770 @param old_schema_name The old schema name
771 @param old_table_name The old table name
772 @param new_schema_name The new schema name
773 @param new_table_name The new table name
774 @param results A map where the result of each operation is stored.
775
776 @return false on success, true on error.
777*/
778bool rename_histograms(THD *thd, const char *old_schema_name,
779 const char *old_table_name, const char *new_schema_name,
780 const char *new_table_name, results_map &results);
781
782bool find_histogram(THD *thd, const std::string &schema_name,
783 const std::string &table_name,
784 const std::string &column_name,
785 const Histogram **histogram);
786} // namespace histograms
787
788#endif
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:250
Definition: field.h:574
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:933
JSON DOM abstract base class.
Definition: json_dom.h:171
Represents a JSON container value of type "object" (ECMA), type J_OBJECT here.
Definition: json_dom.h:367
Mem_root_allocator is a C++ STL memory allocator based on MEM_ROOT.
Definition: mem_root_allocator.h:67
Stateless_allocator is a C++ STL memory allocator skeleton based on Malloc_allocator,...
Definition: stateless_allocator.h:91
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:35
Definition: table.h:2853
Definition: table.h:46
Error context to validate given JSON object which represents a histogram.
Definition: histogram.h:207
Field * m_field
The field for checking endpoint values.
Definition: histogram.h:290
void report_node(const Json_dom *dom, Message err_code)
Report to this context that an error occurs on the given dom node.
Definition: histogram.cc:238
Error_context()
Default constructor.
Definition: histogram.h:212
Error_context(THD *thd, Field *field, results_map *results)
Constructor.
Definition: histogram.h:223
Field * field() const
Return data-type of field in context if present.
Definition: histogram.h:284
results_map * m_results
Where reported errors are stored.
Definition: histogram.h:292
bool m_binary
Whether or not the JSON object to process is in binary format.
Definition: histogram.h:294
THD * m_thd
Thread context for error handlers.
Definition: histogram.h:288
void report_missing_attribute(const std::string &name)
Report to this context that a required attribute is missing.
Definition: histogram.cc:226
bool check_value(T *v)
Check if the value is in the field definition domain.
Definition: histogram.cc:310
void report_global(Message err_code)
Report a global error to this context.
Definition: histogram.cc:214
bool binary() const
Tell whether the input json is an internal persisted copy or a user-defined input.
Definition: histogram.h:275
Histogram base class.
Definition: histogram.h:312
bool extract_json_dom_value(const Json_dom *json_dom, T *out, Error_context *context)
Return the value that is contained in the JSON DOM object.
virtual std::string histogram_type_to_str() const =0
Returns the histogram type as a readable string.
size_t m_num_buckets_specified
The number of buckets originally specified.
Definition: histogram.h:342
Value_map_type get_data_type() const
Definition: histogram.h:575
double get_equal_to_selectivity_dispatcher(const T &value) const
Definition: histogram.cc:1699
virtual size_t get_num_buckets() const =0
MEM_ROOT * m_mem_root
The MEM_ROOT where the histogram contents will be allocated.
Definition: histogram.h:430
static constexpr const char * data_type_str()
String representation of the JSON field "data-type".
Definition: histogram.h:321
double m_sampling_rate
Definition: histogram.h:333
static constexpr const char * collation_id_str()
String representation of the JSON field "collation-id".
Definition: histogram.h:324
double get_less_than_selectivity_dispatcher(const T &value) const
An internal function for getting the selecitvity estimation.
Definition: histogram.cc:1660
static constexpr const char * buckets_str()
String representation of the JSON field "buckets".
Definition: histogram.h:345
static constexpr const char * numer_of_buckets_specified_str()
String representation of the JSON field "number-of-buckets-specified".
Definition: histogram.h:356
virtual ~Histogram()=default
Destructor.
virtual size_t get_num_distinct_values() const =0
Get the estimated number of distinct non-NULL values.
double get_sampling_rate() const
Definition: histogram.h:552
const enum_histogram_type m_hist_type
The type of this histogram.
Definition: histogram.h:433
virtual bool histogram_to_json(Json_object *json_object) const =0
Converts the histogram to a JSON object.
Definition: histogram.cc:387
const CHARSET_INFO * m_charset
The character set for the data stored.
Definition: histogram.h:339
static constexpr const char * last_updated_str()
String representation of the JSON field "last-updated".
Definition: histogram.h:348
LEX_CSTRING m_table_name
Name of the table this histogram represents.
Definition: histogram.h:442
Histogram(const Histogram &other)=delete
size_t get_num_buckets_specified() const
Definition: histogram.h:581
bool get_raw_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const
An internal function for getting a selectivity estimate prior to adustment.
Definition: histogram.cc:1933
static constexpr const char * equi_height_str()
String representation of the histogram type EQUI-HEIGHT.
Definition: histogram.h:330
double get_non_null_values_fraction() const
Definition: histogram.h:666
virtual Histogram * clone(MEM_ROOT *mem_root) const =0
Make a clone of the current histogram.
bool get_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const
Get selectivity estimation.
Definition: histogram.cc:1887
double m_null_values_fraction
The fraction of NULL values in the histogram (between 0.0 and 1.0).
Definition: histogram.h:336
const Value_map_type m_data_type
The type of the data this histogram contains.
Definition: histogram.h:436
const LEX_CSTRING get_database_name() const
Definition: histogram.h:526
LEX_CSTRING m_column_name
Name of the column this histogram represents.
Definition: histogram.h:445
bool get_selectivity_dispatcher(Item *item, const enum_operator op, const TYPELIB *typelib, double *selectivity) const
An internal function for getting the selecitvity estimation.
Definition: histogram.cc:1762
double apply_operator(const enum_operator op, const T &value) const
An internal function for applying the correct function for the given operator.
Definition: histogram.cc:1746
const CHARSET_INFO * get_character_set() const
Definition: histogram.h:549
const LEX_CSTRING get_table_name() const
Definition: histogram.h:531
double get_null_values_fraction() const
Definition: histogram.cc:432
MEM_ROOT * get_mem_root() const
Definition: histogram.h:521
enum_histogram_type get_histogram_type() const
Definition: histogram.h:541
virtual bool json_to_histogram(const Json_object &json_object, Error_context *context)=0
Populate the histogram with data from the provided JSON object.
Definition: histogram.cc:644
LEX_CSTRING m_database_name
Name of the database this histogram represents.
Definition: histogram.h:439
bool store_histogram(THD *thd) const
Store this histogram to persistent storage (data dictionary).
Definition: histogram.cc:1477
static constexpr const char * histogram_type_str()
String representation of the JSON field "histogram-type".
Definition: histogram.h:318
bool histogram_data_type_to_json(Json_object *json_object) const
Write the data type of this histogram into a JSON object.
Definition: histogram.cc:741
static constexpr const char * singleton_str()
String representation of the histogram type SINGLETON.
Definition: histogram.h:327
static constexpr const char * sampling_rate_str()
Definition: histogram.h:353
double get_greater_than_selectivity_dispatcher(const T &value) const
Definition: histogram.cc:1679
const LEX_CSTRING get_column_name() const
Definition: histogram.h:536
enum_histogram_type
All supported histogram types in MySQL.
Definition: histogram.h:315
static constexpr const char * null_values_str()
String representation of the JSON field "null-values".
Definition: histogram.h:351
static MEM_ROOT mem_root
Definition: client_plugin.cc:113
This file includes constants used by all storage engines.
static PFS_engine_table_share_proxy table
Definition: pfs.cc:60
The version of the current data dictionary table definitions.
Definition: dictionary_client.h:42
Definition: column_statistics.h:33
std::set< std::string, std::less< std::string >, Histogram_key_allocator< std::string > > columns_set
Definition: histogram.h:139
bool drop_all_histograms(THD *thd, Table_ref &table, const dd::Table &table_definition, results_map &results)
Drop histograms for all columns in a given table.
Definition: histogram.cc:1431
std::map< std::string, Message, std::less< std::string >, Histogram_key_allocator< std::pair< const std::string, Message > > > results_map
Definition: histogram.h:145
Message
Definition: histogram.h:83
@ JSON_CUMULATIVE_FREQUENCY_NOT_ASCENDING
@ JSON_NUM_BUCKETS_MORE_THAN_SPECIFIED
bool drop_histograms(THD *thd, Table_ref &table, const columns_set &columns, results_map &results)
Drop histograms for a set of columns in a given table.
Definition: histogram.cc:1441
enum_operator
The different operators we can ask histogram statistics for selectivity estimations.
Definition: histogram.h:151
bool rename_histograms(THD *thd, const char *old_schema_name, const char *old_table_name, const char *new_schema_name, const char *new_table_name, results_map &results)
Rename histograms for all columns in a given table.
Definition: histogram.cc:1598
Histogram * build_histogram(MEM_ROOT *mem_root, const Value_map< T > &value_map, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name)
Create a histogram from a value map.
Definition: histogram.cc:442
bool update_histogram(THD *thd, Table_ref *table, const columns_set &columns, int num_buckets, LEX_STRING data, results_map &results)
Create or update histograms for a set of columns of a given table.
Definition: histogram.cc:1238
bool find_histogram(THD *thd, const std::string &schema_name, const std::string &table_name, const std::string &column_name, const Histogram **histogram)
Definition: histogram.cc:1637
static const double INVALID_NULL_VALUES_FRACTION
The default (and invalid) value for "m_null_values_fraction".
Definition: histogram.h:81
std::map< T, ha_rows, Histogram_comparator, value_map_allocator< T > > value_map_type
Definition: histogram.h:136
Value_map_type
Datatypes that a Value_map and histogram can hold (including the invalid type).
Definition: value_map_type.h:32
bool empty(const Histogram &histogram)
Return true if 'histogram' was built on an empty table.
Definition: histogram.h:672
const char * table_name
Definition: rules_table_service.cc:55
const char * db_name
Definition: rules_table_service.cc:54
required string type
Definition: replication_group_member_actions.proto:33
case opt name
Definition: sslopt-case.h:32
Definition: m_ctype.h:422
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:82
Definition: mysql_lex_string.h:39
Definition: mysql_lex_string.h:34
Definition: typelib.h:34
Definition: histogram.h:124
void * operator()(size_t s) const
Definition: histogram.cc:117