MySQL 8.3.0
Source Code Documentation
histograms Namespace Reference

Namespaces

namespace  equi_height
 

Classes

class  Equi_height
 
class  Error_context
 Error context to validate given JSON object which represents a histogram. More...
 
class  Histogram
 Histogram base class. More...
 
struct  Histogram_comparator
 Histogram comparator. More...
 
class  Histogram_error_handler
 RAII class to trap lower-level errors. More...
 
struct  Histogram_psi_key_alloc
 
class  Singleton
 
struct  SingletonBucket
 
class  Value_map
 Value_map class. More...
 
class  Value_map_base
 The abstract base class for all Value_map types. More...
 

Typedefs

using value_map_collection = std::map< uint16, std::unique_ptr< histograms::Value_map_base >, std::less< uint16 >, Histogram_key_allocator< std::pair< const uint16, std::unique_ptr< histograms::Value_map_base > > > >
 
template<class T >
using Histogram_key_allocator = Stateless_allocator< T, Histogram_psi_key_alloc >
 
template<class T >
using value_map_allocator = Mem_root_allocator< std::pair< const T, ha_rows > >
 
template<typename T >
using value_map_type = std::map< T, ha_rows, Histogram_comparator, value_map_allocator< T > >
 
using columns_set = std::set< std::string, std::less< std::string >, Histogram_key_allocator< std::string > >
 
using results_map = std::map< std::string, Message, std::less< std::string >, Histogram_key_allocator< std::pair< const std::string, Message > > >
 

Enumerations

enum class  Message {
  FIELD_NOT_FOUND , UNSUPPORTED_DATA_TYPE , TEMPORARY_TABLE , ENCRYPTED_TABLE ,
  VIEW , HISTOGRAM_CREATED , MULTIPLE_TABLES_SPECIFIED , COVERED_BY_SINGLE_PART_UNIQUE_INDEX ,
  NO_HISTOGRAM_FOUND , HISTOGRAM_DELETED , SERVER_READ_ONLY , MULTIPLE_COLUMNS_SPECIFIED ,
  JSON_FORMAT_ERROR , JSON_NOT_AN_OBJECT , JSON_MISSING_ATTRIBUTE , JSON_WRONG_ATTRIBUTE_TYPE ,
  JSON_WRONG_BUCKET_TYPE_2 , JSON_WRONG_BUCKET_TYPE_4 , JSON_WRONG_DATA_TYPE , JSON_UNSUPPORTED_DATA_TYPE ,
  JSON_UNSUPPORTED_HISTOGRAM_TYPE , JSON_UNSUPPORTED_CHARSET , JSON_INVALID_SAMPLING_RATE , JSON_INVALID_NUM_BUCKETS_SPECIFIED ,
  JSON_INVALID_FREQUENCY , JSON_INVALID_NUM_DISTINCT , JSON_VALUE_FORMAT_ERROR , JSON_VALUE_OUT_OF_RANGE ,
  JSON_VALUE_NOT_ASCENDING_1 , JSON_VALUE_NOT_ASCENDING_2 , JSON_VALUE_DESCENDING_IN_BUCKET , JSON_CUMULATIVE_FREQUENCY_NOT_ASCENDING ,
  JSON_INVALID_NULL_VALUES_FRACTION , JSON_INVALID_TOTAL_FREQUENCY , JSON_NUM_BUCKETS_MORE_THAN_SPECIFIED , JSON_IMPOSSIBLE_EMPTY_EQUI_HEIGHT
}
 
enum class  enum_operator {
  EQUALS_TO , GREATER_THAN , LESS_THAN , IS_NULL ,
  IS_NOT_NULL , LESS_THAN_OR_EQUAL , GREATER_THAN_OR_EQUAL , NOT_EQUALS_TO ,
  BETWEEN , NOT_BETWEEN , IN_LIST , NOT_IN_LIST
}
 The different operators we can ask histogram statistics for selectivity estimations. More...
 
enum class  Value_map_type {
  INVALID , STRING , INT , UINT ,
  DOUBLE , DECIMAL , DATE , TIME ,
  DATETIME , ENUM , SET
}
 Datatypes that a Value_map and histogram can hold (including the invalid type). More...
 

Functions

template<class T >
static bool FitsIntoBuckets (const Value_map< T > &value_map, ha_rows max_bucket_values, size_t max_buckets)
 
template<class T >
static ha_rows FindBucketMaxValues (const Value_map< T > &value_map, size_t max_buckets)
 
static ha_rows EstimateDistinctValues (double sampling_rate, ha_rows bucket_distinct_values, ha_rows bucket_unary_values)
 
static Value_map_type field_type_to_value_map_type (const enum_field_types field_type, const bool is_unsigned)
 Convert from enum_field_types to Value_map_type. More...
 
static Value_map_type field_type_to_value_map_type (const Field *field)
 Get the Value_map_type from a Field object. More...
 
static type_conversion_status check_value_aux (Field *field, const double *nr)
 Helper function for check_value(). More...
 
static type_conversion_status check_value_aux (Field *field, const String *str)
 
static type_conversion_status check_value_aux (Field *field, const longlong *nr)
 
static type_conversion_status check_value_aux (Field *field, const ulonglong *nr)
 
static type_conversion_status check_value_aux (Field *field, MYSQL_TIME *ltime)
 
static type_conversion_status check_value_aux (Field *field, const my_decimal *mdec)
 
static bool lock_for_write (THD *thd, const MDL_key &mdl_key)
 Lock a column statistic MDL key for writing (exclusive lock). More...
 
template<class T >
Histogrambuild_histogram (MEM_ROOT *mem_root, const Value_map< T > &value_map, size_t num_buckets, const std::string &db_name, const std::string &tbl_name, const std::string &col_name)
 Create a histogram from a value map. More...
 
static bool covered_by_single_part_index (const THD *thd, const Field *field)
 Check if a field is covered by a single-part unique index (primary key or unique index). More...
 
static bool prepare_value_maps (std::vector< Field *, Histogram_key_allocator< Field * > > &fields, value_map_collection &value_maps, size_t *row_size_bytes)
 Prepare one Value_map for each field we are creating histogram statistics for. More...
 
static bool fill_value_maps (const std::vector< Field *, Histogram_key_allocator< Field * > > &fields, double sample_percentage, const TABLE *table, value_map_collection &value_maps)
 Read data from a table into the provided Value_maps. More...
 
bool update_histogram (THD *thd, Table_ref *table, const columns_set &columns, int num_buckets, LEX_STRING data, results_map &results)
 Create or update histograms for a set of columns of a given table. More...
 
bool drop_all_histograms (THD *thd, Table_ref &table, const dd::Table &original_table_def, results_map &results)
 Drop histograms for all columns in a given table. More...
 
bool drop_histograms (THD *thd, Table_ref &table, const columns_set &columns, results_map &results)
 Drop histograms for a set of columns in a given table. More...
 
static bool rename_histogram (THD *thd, const char *old_schema_name, const char *old_table_name, const char *new_schema_name, const char *new_table_name, const char *column_name, results_map &results)
 Rename a single histogram from a old schema/table name to a new schema/table name. More...
 
bool rename_histograms (THD *thd, const char *old_schema_name, const char *old_table_name, const char *new_schema_name, const char *new_table_name, results_map &results)
 Rename histograms for all columns in a given table. More...
 
bool find_histogram (THD *thd, const std::string &schema_name, const std::string &table_name, const std::string &column_name, const Histogram **histogram)
 
static bool get_temporal (Item *item, Value_map_type preferred_type, MYSQL_TIME *time_value)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< double > &, size_t, const std::string &, const std::string &, const std::string &)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< String > &, size_t, const std::string &, const std::string &, const std::string &)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< ulonglong > &, size_t, const std::string &, const std::string &, const std::string &)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< longlong > &, size_t, const std::string &, const std::string &, const std::string &)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< MYSQL_TIME > &, size_t, const std::string &, const std::string &, const std::string &)
 
template Histogrambuild_histogram (MEM_ROOT *, const Value_map< my_decimal > &, size_t, const std::string &, const std::string &, const std::string &)
 
bool empty (const Histogram &histogram)
 Return true if 'histogram' was built on an empty table. More...
 
template<typename T >
DeepCopy (const T &src, MEM_ROOT *mem_root, bool *error)
 Returns a deep copy of the input argument. More...
 
template<>
String DeepCopy (const String &src, MEM_ROOT *mem_root, bool *error)
 

Variables

static constexpr int MAX_NUMBER_OF_HISTOGRAM_BUCKETS = 1024
 
static std::map< const Value_map_type, const std::string > value_map_type_to_str
 
static const double INVALID_NULL_VALUES_FRACTION = -1.0
 The default (and invalid) value for "m_null_values_fraction". More...
 
static const size_t HISTOGRAM_MAX_COMPARE_LENGTH = 42
 The maximum number of characters to evaluate when building histograms. More...
 

Typedef Documentation

◆ columns_set

using histograms::columns_set = typedef std::set<std::string, std::less<std::string>, Histogram_key_allocator<std::string> >

◆ Histogram_key_allocator

◆ results_map

using histograms::results_map = typedef std::map<std::string, Message, std::less<std::string>, Histogram_key_allocator<std::pair<const std::string, Message> >>

◆ value_map_allocator

template<class T >
using histograms::value_map_allocator = typedef Mem_root_allocator<std::pair<const T, ha_rows> >

◆ value_map_collection

using histograms::value_map_collection = typedef std::map< uint16, std::unique_ptr<histograms::Value_map_base>, std::less<uint16>, Histogram_key_allocator< std::pair<const uint16, std::unique_ptr<histograms::Value_map_base> >> >

◆ value_map_type

template<typename T >
using histograms::value_map_type = typedef std::map<T, ha_rows, Histogram_comparator, value_map_allocator<T> >

Enumeration Type Documentation

◆ enum_operator

enum class histograms::enum_operator
strong

The different operators we can ask histogram statistics for selectivity estimations.

Enumerator
EQUALS_TO 
GREATER_THAN 
LESS_THAN 
IS_NULL 
IS_NOT_NULL 
LESS_THAN_OR_EQUAL 
GREATER_THAN_OR_EQUAL 
NOT_EQUALS_TO 
BETWEEN 
NOT_BETWEEN 
IN_LIST 
NOT_IN_LIST 

◆ Message

enum class histograms::Message
strong
Enumerator
FIELD_NOT_FOUND 
UNSUPPORTED_DATA_TYPE 
TEMPORARY_TABLE 
ENCRYPTED_TABLE 
VIEW 
HISTOGRAM_CREATED 
MULTIPLE_TABLES_SPECIFIED 
COVERED_BY_SINGLE_PART_UNIQUE_INDEX 
NO_HISTOGRAM_FOUND 
HISTOGRAM_DELETED 
SERVER_READ_ONLY 
MULTIPLE_COLUMNS_SPECIFIED 
JSON_FORMAT_ERROR 
JSON_NOT_AN_OBJECT 
JSON_MISSING_ATTRIBUTE 
JSON_WRONG_ATTRIBUTE_TYPE 
JSON_WRONG_BUCKET_TYPE_2 
JSON_WRONG_BUCKET_TYPE_4 
JSON_WRONG_DATA_TYPE 
JSON_UNSUPPORTED_DATA_TYPE 
JSON_UNSUPPORTED_HISTOGRAM_TYPE 
JSON_UNSUPPORTED_CHARSET 
JSON_INVALID_SAMPLING_RATE 
JSON_INVALID_NUM_BUCKETS_SPECIFIED 
JSON_INVALID_FREQUENCY 
JSON_INVALID_NUM_DISTINCT 
JSON_VALUE_FORMAT_ERROR 
JSON_VALUE_OUT_OF_RANGE 
JSON_VALUE_NOT_ASCENDING_1 
JSON_VALUE_NOT_ASCENDING_2 
JSON_VALUE_DESCENDING_IN_BUCKET 
JSON_CUMULATIVE_FREQUENCY_NOT_ASCENDING 
JSON_INVALID_NULL_VALUES_FRACTION 
JSON_INVALID_TOTAL_FREQUENCY 
JSON_NUM_BUCKETS_MORE_THAN_SPECIFIED 
JSON_IMPOSSIBLE_EMPTY_EQUI_HEIGHT 

◆ Value_map_type

enum class histograms::Value_map_type
strong

Datatypes that a Value_map and histogram can hold (including the invalid type).

Enumerator
INVALID 
STRING 
INT 
UINT 
DOUBLE 
DECIMAL 
DATE 
TIME 
DATETIME 
ENUM 
SET 

Function Documentation

◆ build_histogram() [1/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< double > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [2/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< longlong > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [3/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< my_decimal > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [4/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< MYSQL_TIME > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [5/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< String > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [6/7]

template Histogram * histograms::build_histogram ( MEM_ROOT ,
const Value_map< ulonglong > &  ,
size_t  ,
const std::string &  ,
const std::string &  ,
const std::string &   
)

◆ build_histogram() [7/7]

template<class T >
Histogram * histograms::build_histogram ( MEM_ROOT mem_root,
const Value_map< T > &  value_map,
size_t  num_buckets,
const std::string &  db_name,
const std::string &  tbl_name,
const std::string &  col_name 
)

Create a histogram from a value map.

This function will build a histogram from a value map. The histogram type depends on both the size of the input data, as well as the number of buckets specified. If the number of distinct values is less than or equal to the number of buckets, a Singleton histogram will be created. Otherwise, an equi-height histogram will be created.

The histogram will be allocated on the supplied mem_root, and it is the callers responsibility to properly clean up when the histogram isn't needed anymore.

Parameters
mem_rootthe MEM_ROOT where the histogram contents will be allocated
value_mapa value map containing [value, frequency]
num_bucketsthe maximum number of buckets to create
db_namename of the database this histogram represents
tbl_namename of the table this histogram represents
col_namename of the column this histogram represents
Returns
a histogram, using at most "num_buckets" buckets. The histogram type depends on the size of the input data, and the number of buckets

◆ check_value_aux() [1/6]

static type_conversion_status histograms::check_value_aux ( Field field,
const double *  nr 
)
static

Helper function for check_value().

It uses Field::store() on the actual Field that the histogram belongs to in order to test if the value is in the field definition domain.

◆ check_value_aux() [2/6]

static type_conversion_status histograms::check_value_aux ( Field field,
const longlong nr 
)
static

◆ check_value_aux() [3/6]

static type_conversion_status histograms::check_value_aux ( Field field,
const my_decimal mdec 
)
static

◆ check_value_aux() [4/6]

static type_conversion_status histograms::check_value_aux ( Field field,
const String str 
)
static

◆ check_value_aux() [5/6]

static type_conversion_status histograms::check_value_aux ( Field field,
const ulonglong nr 
)
static

◆ check_value_aux() [6/6]

static type_conversion_status histograms::check_value_aux ( Field field,
MYSQL_TIME ltime 
)
static

◆ covered_by_single_part_index()

static bool histograms::covered_by_single_part_index ( const THD thd,
const Field field 
)
static

Check if a field is covered by a single-part unique index (primary key or unique index).

Indexes that are marked as invisible are ignored.

Parameters
thdThe current session.
fieldThe field to check.
Returns
true if the field is covered by a single-part unique index. False otherwise.

◆ DeepCopy() [1/2]

template<>
String histograms::DeepCopy ( const String src,
MEM_ROOT mem_root,
bool *  error 
)
inline

◆ DeepCopy() [2/2]

template<typename T >
T histograms::DeepCopy ( const T &  src,
MEM_ROOT mem_root,
bool *  error 
)

Returns a deep copy of the input argument.

In case T has heap-allocated data it is copied onto the supplied mem_root.

Note
This function is only intended to be used to copy the values in histogram buckets and does not provide general support for deep copying arbitrary types.
Parameters
srcThe value to be copied.
mem_rootThe MEM_ROOT to copy heap-allocated data onto.
[out]errorSet to true if an error occurs.
Returns
A deep copy of the input argument.

◆ drop_all_histograms()

bool histograms::drop_all_histograms ( THD thd,
Table_ref table,
const dd::Table original_table_def,
results_map results 
)

Drop histograms for all columns in a given table.

Parameters
thdThread handler.
tableThe table where we should look for the columns.
original_table_defOriginal table definition.
resultsA map where the result of each operation is stored.
Note
Assumes that caller owns exclusive metadata lock on the table, so there is no need to lock individual statistics.
Returns
false on success, true on error.

◆ drop_histograms()

bool histograms::drop_histograms ( THD thd,
Table_ref table,
const columns_set columns,
results_map results 
)

Drop histograms for a set of columns in a given table.

This function will try to drop the histogram statistics for all specified columns. If one of the columns fail, it will continue to the next one and try.

Parameters
thdThread handler.
tableThe table where we should look for the columns.
columnsColumns specified by the user.
resultsA map where the result of each operation is stored.
Note
Assumes that the caller has the appropriate metadata locks on both the table and column statistics. That can either be an exclusive metadata lock on the table itself, or a shared metadata lock on the table combined with exclusive locks on individual column statistics.
Returns
false on success, true on error.

◆ empty()

bool histograms::empty ( const Histogram histogram)
inline

Return true if 'histogram' was built on an empty table.

◆ EstimateDistinctValues()

static ha_rows histograms::EstimateDistinctValues ( double  sampling_rate,
ha_rows  bucket_distinct_values,
ha_rows  bucket_unary_values 
)
static

◆ field_type_to_value_map_type() [1/2]

static Value_map_type histograms::field_type_to_value_map_type ( const enum_field_types  field_type,
const bool  is_unsigned 
)
static

Convert from enum_field_types to Value_map_type.

Parameters
field_typethe field type
is_unsignedwhether the field type is unsigned or not. This is only considered if the field type is LONGLONG
Returns
A Value_map_type. May be INVALID if the Value_map does not support the field type.

◆ field_type_to_value_map_type() [2/2]

static Value_map_type histograms::field_type_to_value_map_type ( const Field field)
static

Get the Value_map_type from a Field object.

This effectively looks at the real_type() of a Field, and converts this to a Value_map_type

Parameters
fieldThe field to convert from
Returns
A Value_map_type. May be INVALID if the Value_map does not support the field type.

◆ fill_value_maps()

static bool histograms::fill_value_maps ( const std::vector< Field *, Histogram_key_allocator< Field * > > &  fields,
double  sample_percentage,
const TABLE table,
value_map_collection value_maps 
)
static

Read data from a table into the provided Value_maps.

We will read data using sampling with the provided sampling percentage.

Parameters
fieldsA vector with the fields we are reading data from.
sample_percentageThe sampling percentage we will use for sampling. Must be between 0.0 and 100.0.
tableThe table we are reading the data from.
value_mapsThe Value_maps we are reading data into.
Returns
true on error, false otherwise.

◆ find_histogram()

bool histograms::find_histogram ( THD thd,
const std::string &  schema_name,
const std::string &  table_name,
const std::string &  column_name,
const Histogram **  histogram 
)

◆ FindBucketMaxValues()

template<class T >
static ha_rows histograms::FindBucketMaxValues ( const Value_map< T > &  value_map,
size_t  max_buckets 
)
static

◆ FitsIntoBuckets()

template<class T >
static bool histograms::FitsIntoBuckets ( const Value_map< T > &  value_map,
ha_rows  max_bucket_values,
size_t  max_buckets 
)
static

◆ get_temporal()

static bool histograms::get_temporal ( Item item,
Value_map_type  preferred_type,
MYSQL_TIME time_value 
)
static

◆ lock_for_write()

static bool histograms::lock_for_write ( THD thd,
const MDL_key mdl_key 
)
static

Lock a column statistic MDL key for writing (exclusive lock).

Parameters
thdthread handle
mdl_keythe MDL key to lock
Returns
true on error, false on success

◆ prepare_value_maps()

static bool histograms::prepare_value_maps ( std::vector< Field *, Histogram_key_allocator< Field * > > &  fields,
value_map_collection value_maps,
size_t *  row_size_bytes 
)
static

Prepare one Value_map for each field we are creating histogram statistics for.

We will also estimate how many bytes one row will consume. For example, if we are creating histogram statistics for two INTEGER columns, we estimate that one row will consume (sizeof(longlong) * 2) bytes (16 bytes).

Parameters
fieldsA vector with all the fields we are creating histogram statistics for.
[out]value_mapsA map where the Value_maps will be initialized.
[out]row_size_bytesAn estimation of how many bytes one row will consume.
Returns
true on error, false otherwise.

◆ rename_histogram()

static bool histograms::rename_histogram ( THD thd,
const char *  old_schema_name,
const char *  old_table_name,
const char *  new_schema_name,
const char *  new_table_name,
const char *  column_name,
results_map results 
)
static

Rename a single histogram from a old schema/table name to a new schema/table name.

It is used for instance by RENAME TABLE, where the contents of the histograms doesn't change.

Parameters
thdThread handler.
old_schema_nameThe old schema name.
old_table_nameThe old table name.
new_schema_nameThe new schema name.
new_table_nameThe new table name.
column_nameThe column name.
resultsA map where the result of the operation is stored.
Returns
false on success, true on error.

◆ rename_histograms()

bool histograms::rename_histograms ( THD thd,
const char *  old_schema_name,
const char *  old_table_name,
const char *  new_schema_name,
const char *  new_table_name,
results_map results 
)

Rename histograms for all columns in a given table.

Parameters
thdThread handler.
old_schema_nameThe old schema name
old_table_nameThe old table name
new_schema_nameThe new schema name
new_table_nameThe new table name
resultsA map where the result of each operation is stored.
Returns
false on success, true on error.

◆ update_histogram()

bool histograms::update_histogram ( THD thd,
Table_ref table,
const columns_set columns,
int  num_buckets,
LEX_STRING  data,
results_map results 
)

Create or update histograms for a set of columns of a given table.

This function will try to create histogram statistics for all the columns specified. If one of the columns fail, it will continue to the next one and try.

Parameters
thdThread handler.
tableThe table where we should look for the columns/data.
columnsColumns specified by the user.
num_bucketsThe maximum number of buckets to create in each histogram.
dataThe histogram json literal for update
resultsA map where the result of each operation is stored.
Returns
false on success, true on error.

Variable Documentation

◆ HISTOGRAM_MAX_COMPARE_LENGTH

const size_t histograms::HISTOGRAM_MAX_COMPARE_LENGTH = 42
static

The maximum number of characters to evaluate when building histograms.

For binary/blob values, this is the number of bytes to consider.

◆ INVALID_NULL_VALUES_FRACTION

const double histograms::INVALID_NULL_VALUES_FRACTION = -1.0
static

The default (and invalid) value for "m_null_values_fraction".

◆ MAX_NUMBER_OF_HISTOGRAM_BUCKETS

constexpr int histograms::MAX_NUMBER_OF_HISTOGRAM_BUCKETS = 1024
staticconstexpr

◆ value_map_type_to_str

std::map<const Value_map_type, const std::string> histograms::value_map_type_to_str
static
Initial value:
=
{{Value_map_type::DATETIME, "datetime"}, {Value_map_type::DATE, "date"},
{Value_map_type::TIME, "time"}, {Value_map_type::INT, "int"},
{Value_map_type::UINT, "uint"}, {Value_map_type::DOUBLE, "double"},
{Value_map_type::DECIMAL, "decimal"}, {Value_map_type::STRING, "string"},
{Value_map_type::ENUM, "enum"}, {Value_map_type::SET, "set"}}
@ DATE
Definition: mysqlx_resultset.proto:626