MySQL 8.0.40
Source Code Documentation
cell_calculator.h
Go to the documentation of this file.
1/* Copyright (c) 2018, 2024, Oracle and/or its affiliates.
2
3This program is free software; you can redistribute it and/or modify it under
4the terms of the GNU General Public License, version 2.0, as published by the
5Free Software Foundation.
6
7This program is designed to work with certain software (including
8but not limited to OpenSSL) that is licensed under separate terms,
9as designated in a particular file or component or in included license
10documentation. The authors of MySQL hereby grant you an additional
11permission to link the program and your derivative works with the
12separately licensed software that they have either included with
13the program or referenced in the documentation.
14
15This program is distributed in the hope that it will be useful, but WITHOUT
16ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
18for more details.
19
20You should have received a copy of the GNU General Public License along with
21this program; if not, write to the Free Software Foundation, Inc.,
2251 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/** @file storage/temptable/include/temptable/cell_calculator.h
25TempTable Cell_calculator declaration. */
26
27#ifndef TEMPTABLE_CELL_CALCULATOR_H
28#define TEMPTABLE_CELL_CALCULATOR_H
29
30#include <algorithm>
31#include <cstdint>
32
33#include "m_ctype.h"
34#include "my_dbug.h"
35#include "my_murmur3.h"
36#include "sql/field.h"
37#include "sql/key.h"
39
40namespace temptable {
41
42/** Utility to perform calculations for a cell. It uses cell's contents and
43a stored context that describes how to interpret the data. */
45 public:
46 /** Default constructor used for std::array initialization in Index. */
47 Cell_calculator() = default;
48
49 /** Constructor to be used when creating calculators for indexed columns. */
50 explicit Cell_calculator(
51 /** [in] Key part (indexed column) for which calculator is created. */
52 const KEY_PART_INFO &mysql_key_part);
53
54 /** Constructor to be used when creating calculators for columns when
55 comparing table rows. */
56 explicit Cell_calculator(
57 /** [in] Field (column) for which calculator is created. */
58 const Field *mysql_field);
59
60 /** Calculate hash value for a cell.
61 * @return a hash number */
62 size_t hash(
63 /** [in] Cell for which hash is to be calculated. */
64 const Cell &cell) const;
65
66 /** Compare two cells.
67 * @retval <0 if lhs < rhs
68 * @retval 0 if lhs == rhs
69 * @retval >0 if lhs > rhs */
70 int compare(
71 /** [in] First cell to compare. */
72 const Cell &lhs,
73 /** [in] Second cell to compare. */
74 const Cell &rhs) const;
75
76 private:
77 enum class Mode : uint8_t {
78 BINARY,
79 CHARSET,
81 };
82
83 static const CHARSET_INFO *field_charset(const Field &field);
84
85 /** Field for which this calculator was created. */
87
88 /** Charset used by calculator. NULL for binary mode. */
90
91 /** Calculation mode. */
93
94 /** True if the cell is right-padded with spaces (CHAR column). */
96
97 /** Length in number of characters.
98 * Only used in CHARSET_AND_CHAR_LENGTH mode. */
99 uint32_t m_char_length;
100};
101
102/* Implementation of inlined methods. */
103
105 : m_mysql_field(mysql_key_part.field),
106 m_cs(field_charset(*m_mysql_field)),
107 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
108 m_char_length(0) {
109 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
110
111 if (m_cs != nullptr) {
112 /* Decide if we should use my_charpos. */
113 bool use_char_length = (m_cs->mbmaxlen > 1) &&
114 (mysql_key_part.key_part_flag & HA_PART_KEY_SEG);
115
116 DBUG_EXECUTE_IF("temptable_use_char_length", use_char_length = true;);
117
118 if (use_char_length) {
119 m_char_length = mysql_key_part.length / m_cs->mbmaxlen;
121 } else {
123 }
124 } else {
126 }
127}
128
129inline Cell_calculator::Cell_calculator(const Field *mysql_field)
130 : m_mysql_field(mysql_field),
131 m_cs(field_charset(*m_mysql_field)),
132 m_is_space_padded(m_mysql_field->key_type() == HA_KEYTYPE_TEXT),
133 m_char_length(0) {
134 /* Mimic hp_hashnr() from storage/heap/hp_hash.c. */
135
136 /* No partial keys, so no CHARSET_AND_CHAR_LENGTH here. */
137
138 if (m_cs != nullptr) {
140 } else {
142 }
143}
144
146 /* Decide if we should use charset+collation for comparisons, or rely on pure
147 * binary data. */
148 switch (field.key_type()) {
149 case HA_KEYTYPE_TEXT:
154 if (field.is_flag_set(ENUM_FLAG) || field.is_flag_set(SET_FLAG)) {
155 return &my_charset_bin;
156 } else {
157 return field.charset_for_protocol();
158 }
159 default:
160 return nullptr;
161 }
162}
163
164inline size_t Cell_calculator::hash(const Cell &cell) const {
165 if (cell.is_null()) {
166 return 1;
167 }
168
169 auto data_length = cell.data_length();
170 /*
171 * If the collation of field to calculate hash is with PAD_SPACE attribute,
172 * empty string '' and space ' ' will be calculated as different hash values,
173 * because we handle empty string '' directly (return 0), and calculate hash
174 * with cs for space ' '. But actually, for collations with PAD_SPACE
175 * attribute empty string '' should be equal with space ' '. Do not return
176 * hash value 0 if data_length == 0. */
177
178 auto data = cell.data();
179
180 size_t length = 0;
181
182 /*
183 switch (m_mode) {
184 case Mode::CHARSET:
185 length = ...
186 break;
187 case Mode::CHARSET_AND_CHAR_LENGTH:
188 length = ...
189 break;
190 case Mode::BINARY:
191 return ...
192 }
193 code <-- this is executed when
194 indexed_column.cell_hash_function() == Mode::BINARY
195 and compiled with "Studio 12.5 Sun C++ 5.14 SunOS_sparc 2016/05/31" !!!
196 So we use if-else instead of switch below. */
197
198 if (m_mode == Mode::BINARY) {
199 return murmur3_32(data, data_length, 0);
200 } else if (m_mode == Mode::CHARSET) {
201 length = data_length;
203 length =
204 std::min(static_cast<size_t>(data_length),
205 my_charpos(m_cs, data, data + data_length, m_char_length));
206 } else {
207 my_abort();
208 }
209
210 /* If the field is space padded but collation do not want to use
211 * the padding it is required to strip the spaces from the end. */
213 length = m_cs->cset->lengthsp(m_cs, reinterpret_cast<const char *>(data),
214 length);
215 }
216
217 uint64 h1 = 1;
218 uint64 h2 = 4;
219 m_cs->coll->hash_sort(m_cs, data, length, &h1, &h2);
220 return h1;
221}
222
223inline int Cell_calculator::compare(const Cell &lhs, const Cell &rhs) const {
224 if (lhs.is_null()) {
225 if (rhs.is_null()) {
226 /* Both are NULL. */
227 return 0;
228 } else {
229 /* NULL < whatever (not NULL). */
230 return -1;
231 }
232 } else {
233 if (rhs.is_null()) {
234 /* whatever (not NULL) > NULL. */
235 return 1;
236 }
237 }
238
239 /* Both cells are not NULL. */
240 auto lhs_data_length = lhs.data_length();
241 auto rhs_data_length = rhs.data_length();
242
243 /* If both cells' data is identical, then no need to use the expensive
244 * comparisons below because we know that they will report equality. */
245 if ((lhs_data_length == rhs_data_length) &&
246 ((lhs_data_length == 0) ||
247 (memcmp(lhs.data(), rhs.data(), lhs_data_length) == 0))) {
248 return 0;
249 }
250
251 auto lhs_data = lhs.data();
252 auto rhs_data = rhs.data();
253
254 size_t lhs_length = 0;
255 size_t rhs_length = 0;
256
257 /* Note: Using if-s instead of switch due to bug mentioned in hash(). */
258
259 if (m_mode == Mode::BINARY) {
260 return const_cast<Field *>(m_mysql_field)->key_cmp(lhs_data, rhs_data);
261 } else if (m_mode == Mode::CHARSET) {
262 lhs_length = lhs_data_length;
263 rhs_length = rhs_data_length;
265 lhs_length = std::min(
266 static_cast<size_t>(lhs_data_length),
267 my_charpos(m_cs, lhs_data, lhs_data + lhs_data_length, m_char_length));
268 rhs_length = std::min(
269 static_cast<size_t>(rhs_data_length),
270 my_charpos(m_cs, rhs_data, rhs_data + rhs_data_length, m_char_length));
271 } else {
272 my_abort();
273 }
274
275 /* If the field is space padded but collation do not want to use
276 * the padding it is required to strip the spaces from the end. */
278 /* Strip trailing spaces. */
279 lhs_length = m_cs->cset->lengthsp(
280 m_cs, reinterpret_cast<const char *>(lhs_data), lhs_length);
281 rhs_length = m_cs->cset->lengthsp(
282 m_cs, reinterpret_cast<const char *>(rhs_data), rhs_length);
283 }
284
285 return m_cs->coll->strnncollsp(m_cs, lhs_data, lhs_length, rhs_data,
286 rhs_length);
287}
288
289} /* namespace temptable */
290
291#endif /* TEMPTABLE_CELL_CALCULATOR_H */
TempTable Cell declaration.
Definition: field.h:575
const CHARSET_INFO * charset_for_protocol() const
Definition: field.h:1595
virtual enum ha_base_keytype key_type() const
Definition: field.h:1162
bool is_flag_set(unsigned flag) const
Definition: field.h:749
Definition: key.h:57
uint16 length
Definition: key.h:63
uint16 key_part_flag
Definition: key.h:72
Utility to perform calculations for a cell.
Definition: cell_calculator.h:44
static const CHARSET_INFO * field_charset(const Field &field)
Definition: cell_calculator.h:145
uint32_t m_char_length
Length in number of characters.
Definition: cell_calculator.h:99
Mode m_mode
Calculation mode.
Definition: cell_calculator.h:92
Cell_calculator()=default
Default constructor used for std::array initialization in Index.
Mode
Definition: cell_calculator.h:77
size_t hash(const Cell &cell) const
Calculate hash value for a cell.
Definition: cell_calculator.h:164
const CHARSET_INFO * m_cs
Charset used by calculator.
Definition: cell_calculator.h:89
const Field * m_mysql_field
Field for which this calculator was created.
Definition: cell_calculator.h:86
bool m_is_space_padded
True if the cell is right-padded with spaces (CHAR column).
Definition: cell_calculator.h:95
int compare(const Cell &lhs, const Cell &rhs) const
Compare two cells.
Definition: cell_calculator.h:223
A cell is the intersection of a row and a column.
Definition: cell.h:42
const unsigned char * data() const
Get a pointer to the user data inside the row.
Definition: cell.h:88
bool is_null() const
Check if this cell is NULL.
Definition: cell.h:84
uint32_t data_length() const
Get the length of the user data.
Definition: cell.h:86
#define ENUM_FLAG
field is an enum
Definition: mysql_com.h:164
#define SET_FLAG
field is a set
Definition: mysql_com.h:167
int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length)
Compare key in record buffer to a given key.
Definition: key.cc:453
A better implementation of the UNIX ctype(3) library.
MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin
Definition: ctype-bin.cc:511
#define my_charpos(cs, b, e, num)
Definition: m_ctype.h:719
@ NO_PAD
Definition: m_ctype.h:198
@ HA_KEYTYPE_VARBINARY2
Definition: my_base.h:460
@ HA_KEYTYPE_VARTEXT2
Definition: my_base.h:459
@ HA_KEYTYPE_VARTEXT1
Definition: my_base.h:456
@ HA_KEYTYPE_VARBINARY1
Definition: my_base.h:457
@ HA_KEYTYPE_TEXT
Definition: my_base.h:441
#define HA_PART_KEY_SEG
Definition: my_base.h:562
#define DBUG_EXECUTE_IF(keyword, a1)
Definition: my_dbug.h:171
void my_abort()
Calls our own implementation of abort, if specified, or std's abort().
Definition: my_init.cc:258
uint64_t uint64
Definition: my_inttypes.h:69
uint32 murmur3_32(const uchar *key, size_t len, uint32 seed)
Compute 32-bit version of MurmurHash3 hash for the key.
Definition: my_murmur3.cc:86
int key_type
Definition: http_request.h:50
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
Definition: allocator.h:45
Definition: m_ctype.h:385
uint mbmaxlen
Definition: m_ctype.h:409
MY_COLLATION_HANDLER * coll
Definition: m_ctype.h:418
MY_CHARSET_HANDLER * cset
Definition: m_ctype.h:417
enum Pad_attribute pad_attribute
If this collation is PAD_SPACE, it collates as if all inputs were padded with a given number of space...
Definition: m_ctype.h:427
size_t(* lengthsp)(const CHARSET_INFO *, const char *ptr, size_t length)
Given a pointer and a length in bytes, returns a new length in bytes where all trailing space charact...
Definition: m_ctype.h:333
void(* hash_sort)(const CHARSET_INFO *cs, const uchar *key, size_t len, uint64 *nr1, uint64 *nr2)
Compute a sort hash for the given key.
Definition: m_ctype.h:282
int(* strnncollsp)(const CHARSET_INFO *, const uchar *, size_t, const uchar *, size_t)
Compare the two strings under the pad rules given by the collation.
Definition: m_ctype.h:216