MySQL 8.0.40
Source Code Documentation
pack_rows.h
Go to the documentation of this file.
1#ifndef SQL_PACK_ROWS_H_
2#define SQL_PACK_ROWS_H_
3
4/* Copyright (c) 2020, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file
29
30 Generic routines for packing rows (possibly from multiple tables
31 at the same time) into strings, and then back again. Used for (at least)
32 hash join, BKA, and streaming aggregation.
33 */
34
35#include <assert.h>
36#include <stddef.h>
37#include <string.h>
38
39#include "field_types.h"
40#include "my_bitmap.h"
41#include "my_compiler.h"
42
43#include "my_inttypes.h"
44#include "my_table_map.h"
46#include "prealloced_array.h"
47#include "sql/field.h"
48#include "sql/handler.h"
49#include "sql/table.h"
50#include "template_utils.h"
51
52class JOIN;
53class String;
54
55// Names such as “Column” and “Table” are a tad too generic for the global
56// namespace.
57namespace pack_rows {
58
59/// A class that represents a field, which also holds a cached value of the
60/// field's data type.
61struct Column {
62 explicit Column(Field *field);
63 Field *const field;
64
65 // The field type is used frequently, and caching it gains around 30% in some
66 // of our microbenchmarks.
68};
69
70/// This struct is primarily used for holding the extracted columns in a hash
71/// join or BKA join, or the input columns in a streaming aggregation operation.
72/// When the join or aggregate iterator is constructed, we extract the columns
73/// that are needed to satisfy the SQL query.
74struct Table {
75 explicit Table(TABLE *table_arg);
78
79 // Whether to copy the NULL flags or not.
80 bool copy_null_flags{false};
81
82 // Whether to store the actual contents of NULL-complemented rows.
83 // This is needed by AggregateIterator in order to be able to
84 // restore the exact contents of the record buffer for a table
85 // accessed with EQRefIterator, so that the cache in EQRefIterator
86 // is not disturbed.
88};
89
90/// A structure that contains a list of input tables for a hash join operation,
91/// BKA join operation or a streaming aggregation operation, and some
92/// pre-computed properties for the tables.
94 public:
95 TableCollection() = default;
96
99 table_map tables_to_store_contents_of_null_rows_for);
100
101 const Prealloced_array<Table, 4> &tables() const { return m_tables; }
102
104
106
107 bool has_blob_column() const { return m_has_blob_column; }
108
109 bool store_rowids() const { return m_store_rowids; }
110
113 }
114
115 private:
116 void AddTable(TABLE *tab, bool store_contents_of_null_rows);
117
119
120 // We frequently use the bitmap to determine which side of the join an Item
121 // belongs to, so precomputing the bitmap saves quite some time.
123
124 // Sum of the NULL bytes and the row ID for all of the tables.
126
127 // Whether any of the tables has a BLOB/TEXT column. This is used to determine
128 // whether we need to estimate the row size every time we store a row to the
129 // row buffer or to a chunk file on disk. If this is set to false, we can
130 // pre-allocate any necessary buffers we need during the operation, and thus
131 // eliminate the need for recalculating the row size every time.
132 bool m_has_blob_column = false;
133
134 bool m_store_rowids = false;
136};
137
138/// Possible values of the NULL-row flag stored by StoreFromTableBuffers(). It
139/// tells whether or not a row is a NULL-complemented row in which all column
140/// values (including non-nullable columns) are NULL. Additionally, in case it
141/// is a NULL-complemented row, the flag contains information about whether the
142/// buffer contains the actual non-NULL values that were available in the record
143/// buffer at the time the row was stored, or if no column values are stored for
144/// the NULL-complemented row. Usually, no values are stored for
145/// NULL-complemented rows, but it may be necessary in order to avoid corrupting
146/// the internal cache of EQRefIterator. See Table::store_contents_of_null_rows.
147enum class NullRowFlag {
148 /// The row is not a NULL-complemented one.
149 kNotNull,
150 /// The row is NULL-complemented. No column values are stored in the buffer.
152 /// The row is NULL-complemented. The actual non-NULL values that were in the
153 /// record buffer at the time StoreFromTableBuffers() was called, will however
154 /// be available in the buffer.
156};
157
158/// Count up how many bytes a single row from the given tables will occupy,
159/// in "packed" format. Note that this is an upper bound, so the length after
160/// calling Field::pack may very well be shorter than the size returned by this
161/// function.
162///
163/// The value returned from this function will sum up
164/// 1) The row-id if that is to be kept.
165/// 2) Size of the NULL flags. This includes:
166/// - Space for a NULL flag per nullable column.
167/// - Space for a NULL flag per nullable table (tables on the inner side of
168/// an outer join).
169/// 3) Size of the buffer returned by pack() on all columns marked in the
170/// read_set_internal.
171///
172/// Note that if any of the tables has a BLOB/TEXT column, this function looks
173/// at the data stored in the record buffers. This means that the function can
174/// not be called before reading any rows if tables.has_blob_column is true.
175size_t ComputeRowSizeUpperBound(const TableCollection &tables);
176
177/// Take the data marked for reading in "tables" and store it in the provided
178/// buffer. What data to store is determined by the read set of each table.
179/// Note that any existing data in "buffer" will be overwritten.
180///
181/// The output buffer will contain the following data for each table in
182/// "tables":
183///
184/// 1) NULL-row flag if the table is nullable.
185/// 2) NULL flags for each nullable column.
186/// 3) The actual data from the columns.
187/// 4) The row ID for each row. This is only stored if the optimizer requested
188/// row IDs when creating the TableCollection.
189///
190/// @retval true if error, false otherwise
191bool StoreFromTableBuffers(const TableCollection &tables, String *buffer);
192
193/// Take the data in "ptr" and put it back to the tables' record buffers.
194/// The tables must be _exactly_ the same as when the row was created.
195/// That is, it must contain the same tables in the same order, and the read set
196/// of each table must be identical when storing and restoring the row.
197/// If that's not the case, you will end up with undefined and unpredictable
198/// behavior.
199///
200/// Returns a pointer to where we ended reading.
201const uchar *LoadIntoTableBuffers(const TableCollection &tables,
202 const uchar *ptr);
203
204/// For each of the given tables, request that the row ID is filled in
205/// (the equivalent of calling file->position()) if needed.
206///
207/// @param tables All tables involved in the operation.
208/// @param tables_to_get_rowid_for A bitmap of which tables to actually
209/// get row IDs for. (A table needs to be in both sets to be processed.)
211 table_map tables_to_get_rowid_for);
212
214 table_map tables_to_get_rowid_for);
215
216inline bool ShouldCopyRowId(const TABLE *table) {
217 // It is not safe to copy the row ID if we have a NULL-complemented row; the
218 // value is undefined, or the buffer location can even be nullptr.
219 return !table->const_table && !(table->is_nullable() && table->null_row);
220}
221
223 uchar *dptr) {
224 for (const Table &tbl : tables.tables()) {
225 const TABLE *table = tbl.table;
226
227 NullRowFlag null_row_flag = NullRowFlag::kNotNull;
228 if (table->is_nullable()) {
229 if (table->has_null_row()) {
230 null_row_flag = tbl.store_contents_of_null_rows && table->has_row()
233 }
234 *dptr++ = static_cast<uchar>(null_row_flag);
235 if (null_row_flag == NullRowFlag::kNullWithData) {
236 assert(table->is_started());
237 // If we want to store the actual values in the table buffer for the
238 // NULL-complemented row, instead of the NULLs, we need to restore the
239 // original null flags first. We reset the flags after we have stored
240 // the column values.
241 tbl.table->restore_null_flags();
242 tbl.table->reset_null_row();
243 }
244 }
245
246 // Store the NULL flags.
247 if (tbl.copy_null_flags) {
248 memcpy(dptr, table->null_flags, table->s->null_bytes);
249 dptr += table->s->null_bytes;
250 }
251
252 for (const Column &column : tbl.columns) {
254 column.field->field_index()));
255 if (!column.field->is_null()) {
256 // Store the data in packed format. The packed format will also
257 // include the length of the data if needed.
258 dptr = column.field->pack(dptr);
259 }
260 }
261
262 if (null_row_flag == NullRowFlag::kNullWithData) {
263 // The null flags were changed in order to get the actual contents of the
264 // null row stored. Restore the original null flags.
265 tbl.table->set_null_row();
266 }
267
268 if (tables.store_rowids() && ShouldCopyRowId(table)) {
269 // Store the row ID, since it is needed by weedout.
270 memcpy(dptr, table->file->ref, table->file->ref_length);
271 dptr += table->file->ref_length;
272 }
273 }
274 return dptr;
275}
276
277} // namespace pack_rows
278
279#endif // SQL_PACK_ROWS_H_
Definition: field.h:575
TABLE * table
Pointer to TABLE object that owns this field.
Definition: field.h:681
uint16 field_index() const
Returns field index.
Definition: field.h:1835
bool is_null(ptrdiff_t row_offset=0) const
Check whether the full table's row is NULL or the Field has value NULL.
Definition: field.h:1224
virtual uchar * pack(uchar *to, const uchar *from, size_t max_length) const
Pack the field into a format suitable for storage and transfer.
Definition: field.cc:1871
Definition: sql_optimizer.h:133
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:71
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:168
uchar * ref
Pointer to current row.
Definition: handler.h:4428
uint ref_length
Length of ref (1-8 or the clustered key length)
Definition: handler.h:4496
A structure that contains a list of input tables for a hash join operation, BKA join operation or a s...
Definition: pack_rows.h:93
Prealloced_array< Table, 4 > m_tables
Definition: pack_rows.h:118
void AddTable(TABLE *tab, bool store_contents_of_null_rows)
Definition: pack_rows.cc:78
table_map tables_to_get_rowid_for() const
Definition: pack_rows.h:111
bool has_blob_column() const
Definition: pack_rows.h:107
bool store_rowids() const
Definition: pack_rows.h:109
bool m_store_rowids
Definition: pack_rows.h:134
size_t ref_and_null_bytes_size() const
Definition: pack_rows.h:105
table_map m_tables_bitmap
Definition: pack_rows.h:122
table_map m_tables_to_get_rowid_for
Definition: pack_rows.h:135
size_t m_ref_and_null_bytes_size
Definition: pack_rows.h:125
table_map tables_bitmap() const
Definition: pack_rows.h:103
const Prealloced_array< Table, 4 > & tables() const
Definition: pack_rows.h:101
bool m_has_blob_column
Definition: pack_rows.h:132
This file contains the field type.
enum_field_types
Column types for MySQL.
Definition: field_types.h:53
static bool bitmap_is_set(const MY_BITMAP *map, uint bit)
Definition: my_bitmap.h:95
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:110
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
uint64_t table_map
Definition: my_table_map.h:30
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:420
Definition: pack_rows.cc:36
void PrepareForRequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
Definition: pack_rows.cc:293
bool ShouldCopyRowId(const TABLE *table)
Definition: pack_rows.h:216
bool StoreFromTableBuffers(const TableCollection &tables, String *buffer)
Take the data marked for reading in "tables" and store it in the provided buffer.
Definition: pack_rows.cc:212
ALWAYS_INLINE uchar * StoreFromTableBuffersRaw(const TableCollection &tables, uchar *dptr)
Definition: pack_rows.h:222
size_t ComputeRowSizeUpperBound(const TableCollection &tables)
Count up how many bytes a single row from the given tables will occupy, in "packed" format.
Definition: pack_rows.cc:196
const uchar * LoadIntoTableBuffers(const TableCollection &tables, const uchar *ptr)
Take the data in "ptr" and put it back to the tables' record buffers.
Definition: pack_rows.cc:238
void RequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
For each of the given tables, request that the row ID is filled in (the equivalent of calling file->p...
Definition: pack_rows.cc:282
NullRowFlag
Possible values of the NULL-row flag stored by StoreFromTableBuffers().
Definition: pack_rows.h:147
@ kNullWithoutData
The row is NULL-complemented. No column values are stored in the buffer.
@ kNotNull
The row is not a NULL-complemented one.
@ kNullWithData
The row is NULL-complemented.
Performance schema instrumentation interface.
uint null_bytes
Definition: table.h:840
Definition: table.h:1399
uchar * null_flags
Pointer to the null flags of record[0].
Definition: table.h:1583
bool has_null_row() const
Definition: table.h:2093
bool null_row
Definition: table.h:1728
bool const_table
Definition: table.h:1750
MY_BITMAP read_set_internal
A bitmap of fields that are explicitly referenced by the query.
Definition: table.h:1648
bool is_nullable() const
Return whether table is nullable.
Definition: table.h:1995
bool is_started() const
Definition: table.h:2026
handler * file
Definition: table.h:1401
bool has_row() const
Definition: table.h:2090
TABLE_SHARE * s
Definition: table.h:1400
A class that represents a field, which also holds a cached value of the field's data type.
Definition: pack_rows.h:61
const enum_field_types field_type
Definition: pack_rows.h:67
Field *const field
Definition: pack_rows.h:63
Column(Field *field)
Definition: pack_rows.cc:38
This struct is primarily used for holding the extracted columns in a hash join or BKA join,...
Definition: pack_rows.h:74
Table(TABLE *table_arg)
Definition: pack_rows.cc:43
bool copy_null_flags
Definition: pack_rows.h:80
bool store_contents_of_null_rows
Definition: pack_rows.h:87
Prealloced_array< Column, 8 > columns
Definition: pack_rows.h:77
TABLE * table
Definition: pack_rows.h:76
#define PSI_NOT_INSTRUMENTED
Definition: validate_password_imp.cc:42