MySQL 8.1.0
Source Code Documentation
cell_calculator.h
Go to the documentation of this file.
1/* Copyright (c) 2018, 2023, Oracle and/or its affiliates.
2
3This program is free software; you can redistribute it and/or modify it under
4the terms of the GNU General Public License, version 2.0, as published by the
5Free Software Foundation.
6
7This program is also distributed with certain software (including but not
8limited to OpenSSL) that is licensed under separate terms, as designated in a
9particular file or component or in included license documentation. The authors
10of MySQL hereby grant you an additional permission to link the program and
11your derivative works with the separately licensed software that they have
12included with MySQL.
13
14This program is distributed in the hope that it will be useful, but WITHOUT
15ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
17for more details.
18
19You should have received a copy of the GNU General Public License along with
20this program; if not, write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23/** @file storage/temptable/include/temptable/cell_calculator.h
24TempTable Cell_calculator declaration. */
25
26#ifndef TEMPTABLE_CELL_CALCULATOR_H
27#define TEMPTABLE_CELL_CALCULATOR_H
28
29#include <algorithm>
30#include <cstdint>
31
32#include "my_dbug.h"
33#include "my_murmur3.h"
35#include "sql/field.h"
36#include "sql/key.h"
38
39namespace temptable {
40
41/** Utility to perform calculations for a cell. It uses cell's contents and
42a stored context that describes how to interpret the data. */
44 public:
45 /** Default constructor used for std::array initialization in Index. */
46 Cell_calculator() = default;
47
48 /** Constructor to be used when creating calculators for indexed columns. */
49 explicit Cell_calculator(
50 /** [in] Key part (indexed column) for which calculator is created. */
51 const KEY_PART_INFO &mysql_key_part);
52
53 /** Constructor to be used when creating calculators for columns when
54 comparing table rows. */
55 explicit Cell_calculator(
56 /** [in] Field (column) for which calculator is created. */
57 const Field *mysql_field);
58
59 /** Calculate hash value for a cell.
60 * @return a hash number */
61 size_t hash(
62 /** [in] Cell for which hash is to be calculated. */
63 const Cell &cell) const;
64
65 /** Compare two cells.
66 * @retval <0 if lhs < rhs
67 * @retval 0 if lhs == rhs
68 * @retval >0 if lhs > rhs */
69 int compare(
70 /** [in] First cell to compare. */
71 const Cell &lhs,
72 /** [in] Second cell to compare. */
73 const Cell &rhs) const;
74
75 private:
76 enum class Mode : uint8_t {
77 BINARY,
78 CHARSET,
80 };
81
82 static const CHARSET_INFO *field_charset(const Field &field);
83
84 /** Field for which this calculator was created. */
86
87 /** Charset used by calculator. NULL for binary mode. */
89
90 /** Calculation mode. */
92
93 /** True if the key is of type double or float */
95
96 /** True if the cell is right-padded with spaces (CHAR column). */
98
99 /** Length in number of characters.
100 * Only used in CHARSET_AND_CHAR_LENGTH mode. */
102
103 /** This value is to be used for hashing 0 value for approximate types such as
104 * float or double. Specifically, for such types we don't want to have
105 * separate hash entries for 0.0 and -0.0. */
106 static const size_t s_zero_hash;
107 static size_t zero_hash();
108};
109
110/* Implementation of inlined methods. */
111
113 : m_mysql_field(mysql_key_part.field),
114 m_cs(field_charset(*m_mysql_field)),
115 m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||
116 m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),
117 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
118 m_char_length(0) {
119 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
120
121 if (m_cs != nullptr) {
122 /* Decide if we should use my_charpos. */
123 bool use_char_length = (m_cs->mbmaxlen > 1) &&
124 (mysql_key_part.key_part_flag & HA_PART_KEY_SEG);
125
126 DBUG_EXECUTE_IF("temptable_use_char_length", use_char_length = true;);
127
128 if (use_char_length) {
129 m_char_length = mysql_key_part.length / m_cs->mbmaxlen;
131 } else {
133 }
134 } else {
136 }
137}
138
139inline Cell_calculator::Cell_calculator(const Field *mysql_field)
140 : m_mysql_field(mysql_field),
141 m_cs(field_charset(*m_mysql_field)),
142 m_is_floating_point(m_mysql_field->key_type() == HA_KEYTYPE_FLOAT ||
143 m_mysql_field->key_type() == HA_KEYTYPE_DOUBLE),
144 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
145 m_char_length(0) {
146 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
147
148 /* No partial keys, so no CHARSET_AND_CHAR_LENGTH here. */
149
150 if (m_cs != nullptr) {
152 } else {
154 }
155}
156
158 /* Decide if we should use charset+collation for comparisons, or rely on pure
159 * binary data. */
160 switch (field.key_type()) {
161 case HA_KEYTYPE_TEXT:
166 if (field.is_flag_set(ENUM_FLAG) || field.is_flag_set(SET_FLAG)) {
167 return &my_charset_bin;
168 } else {
169 return field.charset_for_protocol();
170 }
171 default:
172 return nullptr;
173 }
174}
175
176inline size_t Cell_calculator::hash(const Cell &cell) const {
177 if (cell.is_null()) {
178 return 1;
179 }
180
181 auto data_length = cell.data_length();
182 auto data = cell.data();
183
184 /* For approximate types, 0.0 and -0.0 may have different bit patterns. Treat
185 * all such patterns as belonging to a single value. */
187 assert(data_length == 4 || data_length == 8);
188 const double val = data_length == 4 ? float4get(data) : float8get(data);
189 if (val == 0.0) return s_zero_hash;
190 return murmur3_32(data, data_length, 0);
191 }
192
193 /*
194 * If the collation of field to calculate hash is with PAD_SPACE attribute,
195 * empty string '' and space ' ' will be calculated as different hash values,
196 * because we handle empty string '' directly (return 0), and calculate hash
197 * with cs for space ' '. But actually, for collations with PAD_SPACE
198 * attribute empty string '' should be equal with space ' '. Do not return
199 * hash value 0 if data_length == 0. */
200
201 size_t length = 0;
202
203 /*
204 switch (m_mode) {
205 case Mode::CHARSET:
206 length = ...
207 break;
208 case Mode::CHARSET_AND_CHAR_LENGTH:
209 length = ...
210 break;
211 case Mode::BINARY:
212 return ...
213 }
214 code <-- this is executed when
215 indexed_column.cell_hash_function() == Mode::BINARY
216 and compiled with "Studio 12.5 Sun C++ 5.14 SunOS_sparc 2016/05/31" !!!
217 So we use if-else instead of switch below. */
218
219 if (m_mode == Mode::BINARY) {
220 return murmur3_32(data, data_length, 0);
221 } else if (m_mode == Mode::CHARSET) {
222 length = data_length;
224 length =
225 std::min(static_cast<size_t>(data_length),
226 my_charpos(m_cs, data, data + data_length, m_char_length));
227 } else {
228 my_abort();
229 }
230
231 /* If the field is space padded but collation do not want to use
232 * the padding it is required to strip the spaces from the end. */
234 length = m_cs->cset->lengthsp(m_cs, reinterpret_cast<const char *>(data),
235 length);
236 }
237
238 uint64 h1 = 1;
239 uint64 h2 = 4;
240 m_cs->coll->hash_sort(m_cs, data, length, &h1, &h2);
241 return h1;
242}
243
244inline int Cell_calculator::compare(const Cell &lhs, const Cell &rhs) const {
245 if (lhs.is_null()) {
246 if (rhs.is_null()) {
247 /* Both are NULL. */
248 return 0;
249 } else {
250 /* NULL < whatever (not NULL). */
251 return -1;
252 }
253 } else {
254 if (rhs.is_null()) {
255 /* whatever (not NULL) > NULL. */
256 return 1;
257 }
258 }
259
260 /* Both cells are not NULL. */
261 auto lhs_data_length = lhs.data_length();
262 auto rhs_data_length = rhs.data_length();
263
264 /* If both cells' data is identical, then no need to use the expensive
265 * comparisons below because we know that they will report equality. */
266 if ((lhs_data_length == rhs_data_length) &&
267 ((lhs_data_length == 0) ||
268 (memcmp(lhs.data(), rhs.data(), lhs_data_length) == 0))) {
269 return 0;
270 }
271
272 auto lhs_data = lhs.data();
273 auto rhs_data = rhs.data();
274
275 size_t lhs_length = 0;
276 size_t rhs_length = 0;
277
278 /* Note: Using if-s instead of switch due to bug mentioned in hash(). */
279
280 if (m_mode == Mode::BINARY) {
281 return const_cast<Field *>(m_mysql_field)->key_cmp(lhs_data, rhs_data);
282 } else if (m_mode == Mode::CHARSET) {
283 lhs_length = lhs_data_length;
284 rhs_length = rhs_data_length;
286 lhs_length = std::min(
287 static_cast<size_t>(lhs_data_length),
288 my_charpos(m_cs, lhs_data, lhs_data + lhs_data_length, m_char_length));
289 rhs_length = std::min(
290 static_cast<size_t>(rhs_data_length),
291 my_charpos(m_cs, rhs_data, rhs_data + rhs_data_length, m_char_length));
292 } else {
293 my_abort();
294 }
295
296 /* If the field is space padded but collation do not want to use
297 * the padding it is required to strip the spaces from the end. */
299 /* Strip trailing spaces. */
300 lhs_length = m_cs->cset->lengthsp(
301 m_cs, reinterpret_cast<const char *>(lhs_data), lhs_length);
302 rhs_length = m_cs->cset->lengthsp(
303 m_cs, reinterpret_cast<const char *>(rhs_data), rhs_length);
304 }
305
306 return m_cs->coll->strnncollsp(m_cs, lhs_data, lhs_length, rhs_data,
307 rhs_length);
308}
309
310/**
311 Convenience function to get the hash value of 0.0.
312*/
314 // It's ok to have a common hash value for both 0.0 of type float and 0.0 of
315 // type double. Use (double)0.0 for the same.
316 uchar float_data[8];
317
318 float8store(float_data, 0.0);
319 return murmur3_32(float_data, 8, 0);
320}
321
322} /* namespace temptable */
323
324#endif /* TEMPTABLE_CELL_CALCULATOR_H */
static float float4get(const uchar *M)
Definition: big_endian.h:109
TempTable Cell declaration.
Definition: field.h:575
const CHARSET_INFO * charset_for_protocol() const
Definition: field.h:1561
virtual enum ha_base_keytype key_type() const
Definition: field.h:1153
bool is_flag_set(unsigned flag) const
Definition: field.h:749
Definition: key.h:56
uint16 length
Definition: key.h:62
uint16 key_part_flag
Definition: key.h:71
Utility to perform calculations for a cell.
Definition: cell_calculator.h:43
static const CHARSET_INFO * field_charset(const Field &field)
Definition: cell_calculator.h:157
uint32_t m_char_length
Length in number of characters.
Definition: cell_calculator.h:101
Mode m_mode
Calculation mode.
Definition: cell_calculator.h:91
static size_t zero_hash()
Convenience function to get the hash value of 0.0.
Definition: cell_calculator.h:313
static const size_t s_zero_hash
This value is to be used for hashing 0 value for approximate types such as float or double.
Definition: cell_calculator.h:106
Cell_calculator()=default
Default constructor used for std::array initialization in Index.
Mode
Definition: cell_calculator.h:76
size_t hash(const Cell &cell) const
Calculate hash value for a cell.
Definition: cell_calculator.h:176
bool m_is_floating_point
True if the key is of type double or float.
Definition: cell_calculator.h:94
const CHARSET_INFO * m_cs
Charset used by calculator.
Definition: cell_calculator.h:88
const Field * m_mysql_field
Field for which this calculator was created.
Definition: cell_calculator.h:85
bool m_is_space_padded
True if the cell is right-padded with spaces (CHAR column).
Definition: cell_calculator.h:97
int compare(const Cell &lhs, const Cell &rhs) const
Compare two cells.
Definition: cell_calculator.h:244
A cell is the intersection of a row and a column.
Definition: cell.h:41
const unsigned char * data() const
Get a pointer to the user data inside the row.
Definition: cell.h:87
bool is_null() const
Check if this cell is NULL.
Definition: cell.h:83
uint32_t data_length() const
Get the length of the user data.
Definition: cell.h:85
#define ENUM_FLAG
field is an enum
Definition: mysql_com.h:163
#define SET_FLAG
field is a set
Definition: mysql_com.h:166
int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length)
Compare key in record buffer to a given key.
Definition: key.cc:453
A better implementation of the UNIX ctype(3) library.
MYSQL_STRINGS_EXPORT CHARSET_INFO my_charset_bin
Definition: ctype-bin.cc:508
size_t my_charpos(const CHARSET_INFO *cs, const char *beg, const char *end, size_t pos)
Definition: m_ctype.h:660
@ NO_PAD
Definition: m_ctype.h:243
@ HA_KEYTYPE_VARBINARY2
Definition: my_base.h:459
@ HA_KEYTYPE_VARTEXT2
Definition: my_base.h:458
@ HA_KEYTYPE_FLOAT
Definition: my_base.h:444
@ HA_KEYTYPE_DOUBLE
Definition: my_base.h:445
@ HA_KEYTYPE_VARTEXT1
Definition: my_base.h:455
@ HA_KEYTYPE_VARBINARY1
Definition: my_base.h:456
@ HA_KEYTYPE_TEXT
Definition: my_base.h:440
#define HA_PART_KEY_SEG
Definition: my_base.h:561
void float8store(char *V, double M)
Definition: my_byteorder.h:202
double float8get(const char *M)
Definition: my_byteorder.h:198
#define DBUG_EXECUTE_IF(keyword, a1)
Definition: my_dbug.h:170
void my_abort()
Calls our own implementation of abort, if specified, or std's abort().
Definition: my_init.cc:260
unsigned char uchar
Definition: my_inttypes.h:51
uint64_t uint64
Definition: my_inttypes.h:68
uint32 murmur3_32(const uchar *key, size_t len, uint32 seed)
Compute 32-bit version of MurmurHash3 hash for the key.
Definition: my_murmur3.cc:85
int key_type
Definition: http_request.h:49
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
Definition: allocator.h:44
Definition: m_ctype.h:422
unsigned mbmaxlen
Definition: m_ctype.h:446
MY_COLLATION_HANDLER * coll
Definition: m_ctype.h:455
MY_CHARSET_HANDLER * cset
Definition: m_ctype.h:454
enum Pad_attribute pad_attribute
If this collation is PAD_SPACE, it collates as if all inputs were padded with a given number of space...
Definition: m_ctype.h:464
size_t(* lengthsp)(const CHARSET_INFO *, const char *ptr, size_t length)
Given a pointer and a length in bytes, returns a new length in bytes where all trailing space charact...
Definition: m_ctype.h:373
void(* hash_sort)(const CHARSET_INFO *cs, const uint8_t *key, size_t len, uint64_t *nr1, uint64_t *nr2)
Compute a sort hash for the given key.
Definition: m_ctype.h:327
int(* strnncollsp)(const CHARSET_INFO *, const uint8_t *, size_t, const uint8_t *, size_t)
Compare the two strings under the pad rules given by the collation.
Definition: m_ctype.h:261