mysql-server/latest/cell__calculator_8h_source.html

/* Copyright (c) 2018, 2025, Oracle and/or its affiliates.


This program is free software; you can redistribute it and/or modify it under

the terms of the GNU General Public License, version 2.0, as published by the

Free Software Foundation.


This program is designed to work with certain software (including

but not limited to OpenSSL) that is licensed under separate terms,

as designated in a particular file or component or in included license

documentation.  The authors of MySQL hereby grant you an additional

permission to link the program and your derivative works with the

separately licensed software that they have either included with

the program or referenced in the documentation.


This program is distributed in the hope that it will be useful, but WITHOUT

ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,

for more details.


You should have received a copy of the GNU General Public License along with

this program; if not, write to the Free Software Foundation, Inc.,

51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */


/** @file storage/temptable/include/temptable/cell_calculator.h

TempTable Cell_calculator declaration. */


#ifndef TEMPTABLE_CELL_CALCULATOR_H

#define TEMPTABLE_CELL_CALCULATOR_H


#include <algorithm>

#include <cstdint>


#include "my_dbug.h"

#include "my_murmur3.h"

#include "mysql/strings/m_ctype.h"

#include "sql/field.h"

#include "sql/key.h"

#include "storage/temptable/include/temptable/cell.h"


namespace temptable {


/** Utility to perform calculations for a cell. It uses cell's contents and

a stored context that describes how to interpret the data. */

class Cell_calculator {

 public:

  /** Default constructor used for std::array initialization in Index. */

  Cell_calculator() = default;


  /** Constructor to be used when creating calculators for indexed columns. */

  explicit Cell_calculator(

      /** [in] Key part (indexed column) for which calculator is created. */

      const KEY_PART_INFO &mysql_key_part);


  /** Constructor to be used when creating calculators for columns when

  comparing table rows. */

  explicit Cell_calculator(

      /** [in] Field (column) for which calculator is created. */

      const Field *mysql_field);


  /** Calculate hash value for a cell.

   * @return a hash number */

  size_t hash(

      /** [in] Cell for which hash is to be calculated. */

      const Cell &cell) const;


  /** Compare two cells.

   * @retval <0 if lhs < rhs

   * @retval  0 if lhs == rhs

   * @retval >0 if lhs > rhs */

  int compare(

      /** [in] First cell to compare. */

      const Cell &lhs,

      /** [in] Second cell to compare. */

      const Cell &rhs) const;


 private:

  enum class Mode : uint8_t {

    BINARY,

    CHARSET,

    CHARSET_AND_CHAR_LENGTH,

  };


  static const CHARSET_INFO *field_charset(const Field &field);


  /** Field for which this calculator was created. */

  const Field *m_mysql_field;


  /** Charset used by calculator. NULL for binary mode. */

  const CHARSET_INFO *m_cs;


  /** Calculation mode. */

  Mode m_mode;


  /** True if the key is of type double or float */

  bool m_is_floating_point;


  /** True if the cell is right-padded with spaces (CHAR column). */

  bool m_is_space_padded;


  /** Length in number of characters.

   * Only used in CHARSET_AND_CHAR_LENGTH mode. */

  uint32_t m_char_length;


  /** This value is to be used for hashing 0 value for approximate types such as

   * float or double. Specifically, for such types we don't want to have

   * separate hash entries for 0.0 and -0.0. */

  static const size_t s_zero_hash;

  static size_t zero_hash();

};


/* Implementation of inlined methods. */


inline Cell_calculator::Cell_calculator(const KEY_PART_INFO &mysql_key_part)

    : m_mysql_field(mysql_key_part.field),

      m_cs(field_charset(*m_mysql_field)),

      m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||

                          m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),

      m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),

      m_char_length(0) {

  /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */


  if (m_cs != nullptr) {

    /* Decide if we should use my_charpos. */

    bool use_char_length = (m_cs->mbmaxlen > 1) &&

                           (mysql_key_part.key_part_flag & HA_PART_KEY_SEG);


    DBUG_EXECUTE_IF("temptable_use_char_length", use_char_length = true;);


    if (use_char_length) {

      m_char_length = mysql_key_part.length / m_cs->mbmaxlen;

      m_mode = Mode::CHARSET_AND_CHAR_LENGTH;

    } else {

      m_mode = Mode::CHARSET;

    }

  } else {

    m_mode = Mode::BINARY;

  }

}


inline Cell_calculator::Cell_calculator(const Field *mysql_field)

    : m_mysql_field(mysql_field),

      m_cs(field_charset(*m_mysql_field)),

      m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||

                          m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),

      m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),

      m_char_length(0) {

  /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */


  /* No partial keys, so no CHARSET_AND_CHAR_LENGTH here. */


  if (m_cs != nullptr) {

    m_mode = Mode::CHARSET;

  } else {

    m_mode = Mode::BINARY;

  }

}


inline const CHARSET_INFO *Cell_calculator::field_charset(const Field &field) {

  /* Decide if we should use charset+collation for comparisons, or rely on pure

   * binary data. */

  switch (field.key_type()) {

    case HA_KEYTYPE_TEXT:

    case HA_KEYTYPE_VARTEXT1:

    case HA_KEYTYPE_VARTEXT2:

    case HA_KEYTYPE_VARBINARY1:

    case HA_KEYTYPE_VARBINARY2:

      if (field.is_flag_set(ENUM_FLAG) || field.is_flag_set(SET_FLAG)) {

        return &my_charset_bin;

      } else {

        return field.charset_for_protocol();

      }

    default:

      return nullptr;

  }

}


inline size_t Cell_calculator::hash(const Cell &cell) const {

  if (cell.is_null()) {

    return 1;

  }


  auto data_length = cell.data_length();

  auto data = cell.data();


  /* For approximate types, 0.0 and -0.0 may have different bit patterns. Treat

   * all such patterns as belonging to a single value. */

  if (m_is_floating_point) {

    assert(data_length == 4 || data_length == 8);

    const double val = data_length == 4 ? float4get(data) : float8get(data);

    if (val == 0.0) return s_zero_hash;

    return murmur3_32(data, data_length, 0);

  }


  /*

   * If the collation of field to calculate hash is with PAD_SPACE attribute,

   * empty string '' and space ' ' will be calculated as different hash values,

   * because we handle empty string '' directly (return 0), and calculate hash

   * with cs for space ' '. But actually, for collations with PAD_SPACE

   * attribute empty string '' should be equal with space ' '. Do not return

   * hash value 0 if data_length == 0. */


  size_t length = 0;


  /*

  switch (m_mode) {

    case Mode::CHARSET:

      length = ...

      break;

    case Mode::CHARSET_AND_CHAR_LENGTH:

      length = ...

      break;

    case Mode::BINARY:

      return ...

  }

  code <-- this is executed when

  indexed_column.cell_hash_function() == Mode::BINARY

  and compiled with "Studio 12.5 Sun C++ 5.14 SunOS_sparc 2016/05/31" !!!

  So we use if-else instead of switch below. */


  if (m_mode == Mode::BINARY) {

    return murmur3_32(data, data_length, 0);

  } else if (m_mode == Mode::CHARSET) {

    length = data_length;

  } else if (m_mode == Mode::CHARSET_AND_CHAR_LENGTH) {

    length =

        std::min(static_cast<size_t>(data_length),

                 my_charpos(m_cs, data, data + data_length, m_char_length));

  } else {

    my_abort();

  }


  /* If the field is space padded but collation do not want to use

   * the padding it is required to strip the spaces from the end. */

  if (m_is_space_padded && (m_cs->pad_attribute == NO_PAD)) {

    length = m_cs->cset->lengthsp(m_cs, reinterpret_cast<const char *>(data),

                                  length);

  }


  uint64 h1 = 1;

  uint64 h2 = 4;

  m_cs->coll->hash_sort(m_cs, data, length, &h1, &h2);

  return h1;

}


inline int Cell_calculator::compare(const Cell &lhs, const Cell &rhs) const {

  if (lhs.is_null()) {

    if (rhs.is_null()) {

      /* Both are NULL. */

      return 0;

    } else {

      /* NULL < whatever (not NULL). */

      return -1;

    }

  } else {

    if (rhs.is_null()) {

      /* whatever (not NULL) > NULL. */

      return 1;

    }

  }


  /* Both cells are not NULL. */

  auto lhs_data_length = lhs.data_length();

  auto rhs_data_length = rhs.data_length();


  /* If both cells' data is identical, then no need to use the expensive

   * comparisons below because we know that they will report equality. */

  if ((lhs_data_length == rhs_data_length) &&

      ((lhs_data_length == 0) ||

       (memcmp(lhs.data(), rhs.data(), lhs_data_length) == 0))) {

    return 0;

  }


  auto lhs_data = lhs.data();

  auto rhs_data = rhs.data();


  size_t lhs_length = 0;

  size_t rhs_length = 0;


  /* Note: Using if-s instead of switch due to bug mentioned in hash(). */


  if (m_mode == Mode::BINARY) {

    return const_cast<Field *>(m_mysql_field)->key_cmp(lhs_data, rhs_data);

  } else if (m_mode == Mode::CHARSET) {

    lhs_length = lhs_data_length;

    rhs_length = rhs_data_length;

  } else if (m_mode == Mode::CHARSET_AND_CHAR_LENGTH) {

    lhs_length = std::min(

        static_cast<size_t>(lhs_data_length),

        my_charpos(m_cs, lhs_data, lhs_data + lhs_data_length, m_char_length));

    rhs_length = std::min(

        static_cast<size_t>(rhs_data_length),

        my_charpos(m_cs, rhs_data, rhs_data + rhs_data_length, m_char_length));

  } else {

    my_abort();

  }


  /* If the field is space padded but collation do not want to use

   * the padding it is required to strip the spaces from the end. */

  if (m_is_space_padded && (m_cs->pad_attribute == NO_PAD)) {

    /* Strip trailing spaces. */

    lhs_length = m_cs->cset->lengthsp(

        m_cs, reinterpret_cast<const char *>(lhs_data), lhs_length);

    rhs_length = m_cs->cset->lengthsp(

        m_cs, reinterpret_cast<const char *>(rhs_data), rhs_length);

  }


  return m_cs->coll->strnncollsp(m_cs, lhs_data, lhs_length, rhs_data,

                                 rhs_length);

}


/**

   Convenience function to get the hash value of 0.0.

*/

inline size_t Cell_calculator::zero_hash() {

  // It's ok to have a common hash value for both 0.0 of type float and 0.0 of

  // type double. Use (double)0.0 for the same.

  uchar float_data[8];


  float8store(float_data, 0.0);

  return murmur3_32(float_data, 8, 0);

}


} /* namespace temptable */


#endif /* TEMPTABLE_CELL_CALCULATOR_H */

float4get
static float float4get(const uchar *M)
Definition: big_endian.h:110

cell.h
TempTable Cell declaration.

Field
Definition: field.h:573

Field::charset_for_protocol
const CHARSET_INFO * charset_for_protocol() const
Definition: field.h:1596

Field::key_type
virtual enum ha_base_keytype key_type() const
Definition: field.h:1163

Field::is_flag_set
bool is_flag_set(unsigned flag) const
Definition: field.h:750

KEY_PART_INFO
Definition: key.h:57

KEY_PART_INFO::length
uint16 length
Definition: key.h:63

KEY_PART_INFO::key_part_flag
uint16 key_part_flag
Definition: key.h:72

temptable::Cell_calculator
Utility to perform calculations for a cell.
Definition: cell_calculator.h:44

temptable::Cell_calculator::field_charset
static const CHARSET_INFO * field_charset(const Field &field)
Definition: cell_calculator.h:158

temptable::Cell_calculator::m_char_length
uint32_t m_char_length
Length in number of characters.
Definition: cell_calculator.h:102

temptable::Cell_calculator::m_mode
Mode m_mode
Calculation mode.
Definition: cell_calculator.h:92

temptable::Cell_calculator::zero_hash
static size_t zero_hash()
Convenience function to get the hash value of 0.0.
Definition: cell_calculator.h:314

temptable::Cell_calculator::s_zero_hash
static const size_t s_zero_hash
This value is to be used for hashing 0 value for approximate types such as float or double.
Definition: cell_calculator.h:107

temptable::Cell_calculator::Cell_calculator
Cell_calculator()=default
Default constructor used for std::array initialization in Index.

temptable::Cell_calculator::Mode
Mode
Definition: cell_calculator.h:77

temptable::Cell_calculator::Mode::BINARY
@ BINARY

temptable::Cell_calculator::Mode::CHARSET
@ CHARSET

temptable::Cell_calculator::Mode::CHARSET_AND_CHAR_LENGTH
@ CHARSET_AND_CHAR_LENGTH

temptable::Cell_calculator::hash
size_t hash(const Cell &cell) const
Calculate hash value for a cell.
Definition: cell_calculator.h:177

temptable::Cell_calculator::m_is_floating_point
bool m_is_floating_point
True if the key is of type double or float.
Definition: cell_calculator.h:95

temptable::Cell_calculator::m_cs
const CHARSET_INFO * m_cs
Charset used by calculator.
Definition: cell_calculator.h:89

temptable::Cell_calculator::m_mysql_field
const Field * m_mysql_field
Field for which this calculator was created.
Definition: cell_calculator.h:86

temptable::Cell_calculator::m_is_space_padded
bool m_is_space_padded
True if the cell is right-padded with spaces (CHAR column).
Definition: cell_calculator.h:98

temptable::Cell_calculator::compare
int compare(const Cell &lhs, const Cell &rhs) const
Compare two cells.
Definition: cell_calculator.h:245

temptable::Cell
A cell is the intersection of a row and a column.
Definition: cell.h:42

temptable::Cell::data
const unsigned char * data() const
Get a pointer to the user data inside the row.
Definition: cell.h:88

temptable::Cell::is_null
bool is_null() const
Check if this cell is NULL.
Definition: cell.h:84

temptable::Cell::data_length
uint32_t data_length() const
Get the length of the user data.
Definition: cell.h:86

ENUM_FLAG
#define ENUM_FLAG
field is an enum
Definition: mysql_com.h:164

SET_FLAG
#define SET_FLAG
field is a set
Definition: mysql_com.h:167

key_cmp
int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length, bool is_reverse_multi_valued_index_scan)
Compare key in record buffer to a given key.
Definition: key.cc:457

key.h

m_ctype.h
A better implementation of the UNIX ctype(3) library.

my_charset_bin
MYSQL_STRINGS_EXPORT CHARSET_INFO my_charset_bin
Definition: ctype-bin.cc:499

my_charpos
size_t my_charpos(const CHARSET_INFO *cs, const char *beg, const char *end, size_t pos)
Definition: m_ctype.h:656

NO_PAD
@ NO_PAD
Definition: m_ctype.h:243

HA_KEYTYPE_VARBINARY2
@ HA_KEYTYPE_VARBINARY2
Definition: my_base.h:461

HA_KEYTYPE_VARTEXT2
@ HA_KEYTYPE_VARTEXT2
Definition: my_base.h:460

HA_KEYTYPE_FLOAT
@ HA_KEYTYPE_FLOAT
Definition: my_base.h:446

HA_KEYTYPE_DOUBLE
@ HA_KEYTYPE_DOUBLE
Definition: my_base.h:447

HA_KEYTYPE_VARTEXT1
@ HA_KEYTYPE_VARTEXT1
Definition: my_base.h:457

HA_KEYTYPE_VARBINARY1
@ HA_KEYTYPE_VARBINARY1
Definition: my_base.h:458

HA_KEYTYPE_TEXT
@ HA_KEYTYPE_TEXT
Definition: my_base.h:442

HA_PART_KEY_SEG
#define HA_PART_KEY_SEG
Definition: my_base.h:563

float8store
void float8store(char *V, double M)
Definition: my_byteorder.h:210

float8get
double float8get(const char *M)
Definition: my_byteorder.h:206

my_dbug.h

DBUG_EXECUTE_IF
#define DBUG_EXECUTE_IF(keyword, a1)
Definition: my_dbug.h:171

my_abort
void my_abort()
Calls our own implementation of abort, if specified, or std's abort().
Definition: my_init.cc:266

uchar
unsigned char uchar
Definition: my_inttypes.h:52

uint64
uint64_t uint64
Definition: my_inttypes.h:69

my_murmur3.h

murmur3_32
uint32 murmur3_32(const uchar *key, size_t len, uint32 seed)
Compute 32-bit version of MurmurHash3 hash for the key.
Definition: my_murmur3.cc:86

gis::length
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76

http::base::method::key_type
int key_type
Definition: method.h:38

temptable
Definition: allocator.h:48

field.h

CHARSET_INFO
Definition: m_ctype.h:421

CHARSET_INFO::mbmaxlen
unsigned mbmaxlen
Definition: m_ctype.h:445

CHARSET_INFO::coll
MY_COLLATION_HANDLER * coll
Definition: m_ctype.h:454

CHARSET_INFO::cset
MY_CHARSET_HANDLER * cset
Definition: m_ctype.h:453

CHARSET_INFO::pad_attribute
enum Pad_attribute pad_attribute
If this collation is PAD_SPACE, it collates as if all inputs were padded with a given number of space...
Definition: m_ctype.h:463

MY_CHARSET_HANDLER::lengthsp
size_t(* lengthsp)(const CHARSET_INFO *, const char *ptr, size_t length)
Given a pointer and a length in bytes, returns a new length in bytes where all trailing space charact...
Definition: m_ctype.h:372

MY_COLLATION_HANDLER::hash_sort
void(* hash_sort)(const CHARSET_INFO *cs, const uint8_t *key, size_t len, uint64_t *nr1, uint64_t *nr2)
Compute a sort hash for the given key.
Definition: m_ctype.h:326

MY_COLLATION_HANDLER::strnncollsp
int(* strnncollsp)(const CHARSET_INFO *, const uint8_t *, size_t, const uint8_t *, size_t)
Compare the two strings under the pad rules given by the collation.
Definition: m_ctype.h:261