MySQL 8.4.0
Source Code Documentation
cell_calculator.h
Go to the documentation of this file.
1/* Copyright (c) 2018, 2024, Oracle and/or its affiliates.
2
3This program is free software; you can redistribute it and/or modify it under
4the terms of the GNU General Public License, version 2.0, as published by the
5Free Software Foundation.
6
7This program is designed to work with certain software (including
8but not limited to OpenSSL) that is licensed under separate terms,
9as designated in a particular file or component or in included license
10documentation. The authors of MySQL hereby grant you an additional
11permission to link the program and your derivative works with the
12separately licensed software that they have either included with
13the program or referenced in the documentation.
14
15This program is distributed in the hope that it will be useful, but WITHOUT
16ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
18for more details.
19
20You should have received a copy of the GNU General Public License along with
21this program; if not, write to the Free Software Foundation, Inc.,
2251 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/** @file storage/temptable/include/temptable/cell_calculator.h
25TempTable Cell_calculator declaration. */
26
27#ifndef TEMPTABLE_CELL_CALCULATOR_H
28#define TEMPTABLE_CELL_CALCULATOR_H
29
30#include <algorithm>
31#include <cstdint>
32
33#include "my_dbug.h"
34#include "my_murmur3.h"
36#include "sql/field.h"
37#include "sql/key.h"
39
40namespace temptable {
41
42/** Utility to perform calculations for a cell. It uses cell's contents and
43a stored context that describes how to interpret the data. */
45 public:
46 /** Default constructor used for std::array initialization in Index. */
47 Cell_calculator() = default;
48
49 /** Constructor to be used when creating calculators for indexed columns. */
50 explicit Cell_calculator(
51 /** [in] Key part (indexed column) for which calculator is created. */
52 const KEY_PART_INFO &mysql_key_part);
53
54 /** Constructor to be used when creating calculators for columns when
55 comparing table rows. */
56 explicit Cell_calculator(
57 /** [in] Field (column) for which calculator is created. */
58 const Field *mysql_field);
59
60 /** Calculate hash value for a cell.
61 * @return a hash number */
62 size_t hash(
63 /** [in] Cell for which hash is to be calculated. */
64 const Cell &cell) const;
65
66 /** Compare two cells.
67 * @retval <0 if lhs < rhs
68 * @retval 0 if lhs == rhs
69 * @retval >0 if lhs > rhs */
70 int compare(
71 /** [in] First cell to compare. */
72 const Cell &lhs,
73 /** [in] Second cell to compare. */
74 const Cell &rhs) const;
75
76 private:
77 enum class Mode : uint8_t {
78 BINARY,
79 CHARSET,
81 };
82
83 static const CHARSET_INFO *field_charset(const Field &field);
84
85 /** Field for which this calculator was created. */
87
88 /** Charset used by calculator. NULL for binary mode. */
90
91 /** Calculation mode. */
93
94 /** True if the key is of type double or float */
96
97 /** True if the cell is right-padded with spaces (CHAR column). */
99
100 /** Length in number of characters.
101 * Only used in CHARSET_AND_CHAR_LENGTH mode. */
103
104 /** This value is to be used for hashing 0 value for approximate types such as
105 * float or double. Specifically, for such types we don't want to have
106 * separate hash entries for 0.0 and -0.0. */
107 static const size_t s_zero_hash;
108 static size_t zero_hash();
109};
110
111/* Implementation of inlined methods. */
112
114 : m_mysql_field(mysql_key_part.field),
115 m_cs(field_charset(*m_mysql_field)),
116 m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||
117 m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),
118 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
119 m_char_length(0) {
120 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
121
122 if (m_cs != nullptr) {
123 /* Decide if we should use my_charpos. */
124 bool use_char_length = (m_cs->mbmaxlen > 1) &&
125 (mysql_key_part.key_part_flag & HA_PART_KEY_SEG);
126
127 DBUG_EXECUTE_IF("temptable_use_char_length", use_char_length = true;);
128
129 if (use_char_length) {
130 m_char_length = mysql_key_part.length / m_cs->mbmaxlen;
132 } else {
134 }
135 } else {
137 }
138}
139
140inline Cell_calculator::Cell_calculator(const Field *mysql_field)
141 : m_mysql_field(mysql_field),
142 m_cs(field_charset(*m_mysql_field)),
143 m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||
144 m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),
145 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
146 m_char_length(0) {
147 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
148
149 /* No partial keys, so no CHARSET_AND_CHAR_LENGTH here. */
150
151 if (m_cs != nullptr) {
153 } else {
155 }
156}
157
159 /* Decide if we should use charset+collation for comparisons, or rely on pure
160 * binary data. */
161 switch (field.key_type()) {
162 case HA_KEYTYPE_TEXT:
167 if (field.is_flag_set(ENUM_FLAG) || field.is_flag_set(SET_FLAG)) {
168 return &my_charset_bin;
169 } else {
170 return field.charset_for_protocol();
171 }
172 default:
173 return nullptr;
174 }
175}
176
177inline size_t Cell_calculator::hash(const Cell &cell) const {
178 if (cell.is_null()) {
179 return 1;
180 }
181
182 auto data_length = cell.data_length();
183 auto data = cell.data();
184
185 /* For approximate types, 0.0 and -0.0 may have different bit patterns. Treat
186 * all such patterns as belonging to a single value. */
188 assert(data_length == 4 || data_length == 8);
189 const double val = data_length == 4 ? float4get(data) : float8get(data);
190 if (val == 0.0) return s_zero_hash;
191 return murmur3_32(data, data_length, 0);
192 }
193
194 /*
195 * If the collation of field to calculate hash is with PAD_SPACE attribute,
196 * empty string '' and space ' ' will be calculated as different hash values,
197 * because we handle empty string '' directly (return 0), and calculate hash
198 * with cs for space ' '. But actually, for collations with PAD_SPACE
199 * attribute empty string '' should be equal with space ' '. Do not return
200 * hash value 0 if data_length == 0. */
201
202 size_t length = 0;
203
204 /*
205 switch (m_mode) {
206 case Mode::CHARSET:
207 length = ...
208 break;
209 case Mode::CHARSET_AND_CHAR_LENGTH:
210 length = ...
211 break;
212 case Mode::BINARY:
213 return ...
214 }
215 code <-- this is executed when
216 indexed_column.cell_hash_function() == Mode::BINARY
217 and compiled with "Studio 12.5 Sun C++ 5.14 SunOS_sparc 2016/05/31" !!!
218 So we use if-else instead of switch below. */
219
220 if (m_mode == Mode::BINARY) {
221 return murmur3_32(data, data_length, 0);
222 } else if (m_mode == Mode::CHARSET) {
223 length = data_length;
225 length =
226 std::min(static_cast<size_t>(data_length),
227 my_charpos(m_cs, data, data + data_length, m_char_length));
228 } else {
229 my_abort();
230 }
231
232 /* If the field is space padded but collation do not want to use
233 * the padding it is required to strip the spaces from the end. */
235 length = m_cs->cset->lengthsp(m_cs, reinterpret_cast<const char *>(data),
236 length);
237 }
238
239 uint64 h1 = 1;
240 uint64 h2 = 4;
241 m_cs->coll->hash_sort(m_cs, data, length, &h1, &h2);
242 return h1;
243}
244
245inline int Cell_calculator::compare(const Cell &lhs, const Cell &rhs) const {
246 if (lhs.is_null()) {
247 if (rhs.is_null()) {
248 /* Both are NULL. */
249 return 0;
250 } else {
251 /* NULL < whatever (not NULL). */
252 return -1;
253 }
254 } else {
255 if (rhs.is_null()) {
256 /* whatever (not NULL) > NULL. */
257 return 1;
258 }
259 }
260
261 /* Both cells are not NULL. */
262 auto lhs_data_length = lhs.data_length();
263 auto rhs_data_length = rhs.data_length();
264
265 /* If both cells' data is identical, then no need to use the expensive
266 * comparisons below because we know that they will report equality. */
267 if ((lhs_data_length == rhs_data_length) &&
268 ((lhs_data_length == 0) ||
269 (memcmp(lhs.data(), rhs.data(), lhs_data_length) == 0))) {
270 return 0;
271 }
272
273 auto lhs_data = lhs.data();
274 auto rhs_data = rhs.data();
275
276 size_t lhs_length = 0;
277 size_t rhs_length = 0;
278
279 /* Note: Using if-s instead of switch due to bug mentioned in hash(). */
280
281 if (m_mode == Mode::BINARY) {
282 return const_cast<Field *>(m_mysql_field)->key_cmp(lhs_data, rhs_data);
283 } else if (m_mode == Mode::CHARSET) {
284 lhs_length = lhs_data_length;
285 rhs_length = rhs_data_length;
287 lhs_length = std::min(
288 static_cast<size_t>(lhs_data_length),
289 my_charpos(m_cs, lhs_data, lhs_data + lhs_data_length, m_char_length));
290 rhs_length = std::min(
291 static_cast<size_t>(rhs_data_length),
292 my_charpos(m_cs, rhs_data, rhs_data + rhs_data_length, m_char_length));
293 } else {
294 my_abort();
295 }
296
297 /* If the field is space padded but collation do not want to use
298 * the padding it is required to strip the spaces from the end. */
300 /* Strip trailing spaces. */
301 lhs_length = m_cs->cset->lengthsp(
302 m_cs, reinterpret_cast<const char *>(lhs_data), lhs_length);
303 rhs_length = m_cs->cset->lengthsp(
304 m_cs, reinterpret_cast<const char *>(rhs_data), rhs_length);
305 }
306
307 return m_cs->coll->strnncollsp(m_cs, lhs_data, lhs_length, rhs_data,
308 rhs_length);
309}
310
311/**
312 Convenience function to get the hash value of 0.0.
313*/
315 // It's ok to have a common hash value for both 0.0 of type float and 0.0 of
316 // type double. Use (double)0.0 for the same.
317 uchar float_data[8];
318
319 float8store(float_data, 0.0);
320 return murmur3_32(float_data, 8, 0);
321}
322
323} /* namespace temptable */
324
325#endif /* TEMPTABLE_CELL_CALCULATOR_H */
static float float4get(const uchar *M)
Definition: big_endian.h:110
TempTable Cell declaration.
Definition: field.h:575
const CHARSET_INFO * charset_for_protocol() const
Definition: field.h:1589
virtual enum ha_base_keytype key_type() const
Definition: field.h:1156
bool is_flag_set(unsigned flag) const
Definition: field.h:752
Definition: key.h:57
uint16 length
Definition: key.h:63
uint16 key_part_flag
Definition: key.h:72
Utility to perform calculations for a cell.
Definition: cell_calculator.h:44
static const CHARSET_INFO * field_charset(const Field &field)
Definition: cell_calculator.h:158
uint32_t m_char_length
Length in number of characters.
Definition: cell_calculator.h:102
Mode m_mode
Calculation mode.
Definition: cell_calculator.h:92
static size_t zero_hash()
Convenience function to get the hash value of 0.0.
Definition: cell_calculator.h:314
static const size_t s_zero_hash
This value is to be used for hashing 0 value for approximate types such as float or double.
Definition: cell_calculator.h:107
Cell_calculator()=default
Default constructor used for std::array initialization in Index.
Mode
Definition: cell_calculator.h:77
size_t hash(const Cell &cell) const
Calculate hash value for a cell.
Definition: cell_calculator.h:177
bool m_is_floating_point
True if the key is of type double or float.
Definition: cell_calculator.h:95
const CHARSET_INFO * m_cs
Charset used by calculator.
Definition: cell_calculator.h:89
const Field * m_mysql_field
Field for which this calculator was created.
Definition: cell_calculator.h:86
bool m_is_space_padded
True if the cell is right-padded with spaces (CHAR column).
Definition: cell_calculator.h:98
int compare(const Cell &lhs, const Cell &rhs) const
Compare two cells.
Definition: cell_calculator.h:245
A cell is the intersection of a row and a column.
Definition: cell.h:42
const unsigned char * data() const
Get a pointer to the user data inside the row.
Definition: cell.h:88
bool is_null() const
Check if this cell is NULL.
Definition: cell.h:84
uint32_t data_length() const
Get the length of the user data.
Definition: cell.h:86
#define ENUM_FLAG
field is an enum
Definition: mysql_com.h:164
#define SET_FLAG
field is a set
Definition: mysql_com.h:167
int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length)
Compare key in record buffer to a given key.
Definition: key.cc:454
A better implementation of the UNIX ctype(3) library.
MYSQL_STRINGS_EXPORT CHARSET_INFO my_charset_bin
Definition: ctype-bin.cc:509
size_t my_charpos(const CHARSET_INFO *cs, const char *beg, const char *end, size_t pos)
Definition: m_ctype.h:658
@ NO_PAD
Definition: m_ctype.h:244
@ HA_KEYTYPE_VARBINARY2
Definition: my_base.h:461
@ HA_KEYTYPE_VARTEXT2
Definition: my_base.h:460
@ HA_KEYTYPE_FLOAT
Definition: my_base.h:446
@ HA_KEYTYPE_DOUBLE
Definition: my_base.h:447
@ HA_KEYTYPE_VARTEXT1
Definition: my_base.h:457
@ HA_KEYTYPE_VARBINARY1
Definition: my_base.h:458
@ HA_KEYTYPE_TEXT
Definition: my_base.h:442
#define HA_PART_KEY_SEG
Definition: my_base.h:563
void float8store(char *V, double M)
Definition: my_byteorder.h:210
double float8get(const char *M)
Definition: my_byteorder.h:206
#define DBUG_EXECUTE_IF(keyword, a1)
Definition: my_dbug.h:171
void my_abort()
Calls our own implementation of abort, if specified, or std's abort().
Definition: my_init.cc:261
unsigned char uchar
Definition: my_inttypes.h:52
uint64_t uint64
Definition: my_inttypes.h:69
uint32 murmur3_32(const uchar *key, size_t len, uint32 seed)
Compute 32-bit version of MurmurHash3 hash for the key.
Definition: my_murmur3.cc:86
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
int key_type
Definition: method.h:38
Definition: allocator.h:45
Definition: m_ctype.h:423
unsigned mbmaxlen
Definition: m_ctype.h:447
MY_COLLATION_HANDLER * coll
Definition: m_ctype.h:456
MY_CHARSET_HANDLER * cset
Definition: m_ctype.h:455
enum Pad_attribute pad_attribute
If this collation is PAD_SPACE, it collates as if all inputs were padded with a given number of space...
Definition: m_ctype.h:465
size_t(* lengthsp)(const CHARSET_INFO *, const char *ptr, size_t length)
Given a pointer and a length in bytes, returns a new length in bytes where all trailing space charact...
Definition: m_ctype.h:374
void(* hash_sort)(const CHARSET_INFO *cs, const uint8_t *key, size_t len, uint64_t *nr1, uint64_t *nr2)
Compute a sort hash for the given key.
Definition: m_ctype.h:328
int(* strnncollsp)(const CHARSET_INFO *, const uint8_t *, size_t, const uint8_t *, size_t)
Compare the two strings under the pad rules given by the collation.
Definition: m_ctype.h:262