MySQL 9.1.0
Source Code Documentation
pack_rows.h
Go to the documentation of this file.
1#ifndef SQL_PACK_ROWS_H_
2#define SQL_PACK_ROWS_H_
3
4/* Copyright (c) 2020, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file
29
30 Generic routines for packing rows (possibly from multiple tables
31 at the same time) into strings, and then back again. Used for (at least)
32 hash join, BKA, and streaming aggregation.
33 */
34
35#include <assert.h>
36#include <stddef.h>
37#include <string.h>
38
39#include "field_types.h"
40#include "my_bitmap.h"
41#include "my_compiler.h"
42
43#include "my_inttypes.h"
44#include "my_table_map.h"
46#include "prealloced_array.h"
47#include "sql/field.h"
48#include "sql/handler.h"
49#include "sql/table.h"
50#include "template_utils.h"
51
52class JOIN;
53class String;
54
55// Names such as “Column” and “Table” are a tad too generic for the global
56// namespace.
57namespace pack_rows {
58
59/// A class that represents a field, which also holds a cached value of the
60/// field's data type.
61struct Column {
62 explicit Column(Field *field);
63 Field *const field;
64
65 // The field type is used frequently, and caching it gains around 30% in some
66 // of our microbenchmarks.
68};
69
70/// This struct is primarily used for holding the extracted columns in a hash
71/// join or BKA join, or the input columns in a streaming aggregation operation.
72/// When the join or aggregate iterator is constructed, we extract the columns
73/// that are needed to satisfy the SQL query.
74struct Table {
75 explicit Table(TABLE *table_arg);
78
79 // Whether to copy the NULL flags or not.
80 bool copy_null_flags{false};
81
82 // Whether to store the actual contents of NULL-complemented rows.
83 // This is needed by AggregateIterator in order to be able to
84 // restore the exact contents of the record buffer for a table
85 // accessed with EQRefIterator, so that the cache in EQRefIterator
86 // is not disturbed.
88};
89
90/// A structure that contains a list of input tables for a hash join operation,
91/// BKA join operation or a streaming aggregation operation, and some
92/// pre-computed properties for the tables.
94 public:
95 TableCollection() = default;
96
99 table_map tables_to_store_contents_of_null_rows_for);
100
101 const Prealloced_array<Table, 4> &tables() const { return m_tables; }
102
104
106
107 bool has_blob_column() const { return m_has_blob_column; }
108
109 bool store_rowids() const { return m_store_rowids; }
110
113 }
114
115 private:
116 void AddTable(TABLE *tab, bool store_contents_of_null_rows);
117
119
120 // We frequently use the bitmap to determine which side of the join an Item
121 // belongs to, so precomputing the bitmap saves quite some time.
123
124 // Sum of the NULL bytes and the row ID for all of the tables.
126
127 // Whether any of the tables has a BLOB/TEXT column. This is used to determine
128 // whether we need to estimate the row size every time we store a row to the
129 // row buffer or to a chunk file on disk. If this is set to false, we can
130 // pre-allocate any necessary buffers we need during the operation, and thus
131 // eliminate the need for recalculating the row size every time.
132 bool m_has_blob_column = false;
133
134 bool m_store_rowids = false;
136};
137
138/// Possible values of the NULL-row flag stored by StoreFromTableBuffers(). It
139/// tells whether or not a row is a NULL-complemented row in which all column
140/// values (including non-nullable columns) are NULL. Additionally, in case it
141/// is a NULL-complemented row, the flag contains information about whether the
142/// buffer contains the actual non-NULL values that were available in the record
143/// buffer at the time the row was stored, or if no column values are stored for
144/// the NULL-complemented row. Usually, no values are stored for
145/// NULL-complemented rows, but it may be necessary in order to avoid corrupting
146/// the internal cache of EQRefIterator. See Table::store_contents_of_null_rows.
147enum class NullRowFlag {
148 /// The row is not a NULL-complemented one.
149 kNotNull,
150 /// The row is NULL-complemented. No column values are stored in the buffer.
152 /// The row is NULL-complemented. The actual non-NULL values that were in the
153 /// record buffer at the time StoreFromTableBuffers() was called, will however
154 /// be available in the buffer.
156};
157
158/// Count up how many bytes a single row from the given tables will occupy,
159/// in "packed" format. Note that this is an upper bound, so the length after
160/// calling Field::pack may very well be shorter than the size returned by this
161/// function.
162///
163/// The value returned from this function will sum up
164/// 1) The row-id if that is to be kept.
165/// 2) Size of the NULL flags. This includes:
166/// - Space for a NULL flag per nullable column.
167/// - Space for a NULL flag per nullable table (tables on the inner side of
168/// an outer join).
169/// 3) Size of the buffer returned by pack() on all columns marked in the
170/// \c read_set_internal.
171/// We do not necessarily have valid data in the table buffers, so we do not try
172/// to calculate size for blobs.
173size_t ComputeRowSizeUpperBoundSansBlobs(const TableCollection &tables);
174/// Similar to ComputeRowSizeUpperBoundSansBlobs, but will calculate blob size
175/// as well. To do this, we need to look at the data stored in the record
176/// buffers.
177/// \note{This means that the function cannot be called without making sure
178/// there is valid data in the table buffers.}
179size_t ComputeRowSizeUpperBound(const TableCollection &tables);
180
181/// Take the data marked for reading in "tables" and store it in the provided
182/// buffer. What data to store is determined by the read set of each table.
183/// Note that any existing data in "buffer" will be overwritten.
184///
185/// The output buffer will contain the following data for each table in
186/// "tables":
187///
188/// 1) NULL-row flag if the table is nullable.
189/// 2) NULL flags for each nullable column.
190/// 3) The actual data from the columns.
191/// 4) The row ID for each row. This is only stored if the optimizer requested
192/// row IDs when creating the TableCollection.
193///
194/// @retval true if error, false otherwise
195bool StoreFromTableBuffers(const TableCollection &tables, String *buffer);
196
197/// Take the data in "ptr" and put it back to the tables' record buffers.
198/// The tables must be _exactly_ the same as when the row was created.
199/// That is, it must contain the same tables in the same order, and the read set
200/// of each table must be identical when storing and restoring the row.
201/// If that's not the case, you will end up with undefined and unpredictable
202/// behavior.
203///
204/// Returns a pointer to where we ended reading.
205const uchar *LoadIntoTableBuffers(const TableCollection &tables,
206 const uchar *ptr);
207
208/// For each of the given tables, request that the row ID is filled in
209/// (the equivalent of calling file->position()) if needed.
210///
211/// @param tables All tables involved in the operation.
212/// @param tables_to_get_rowid_for A bitmap of which tables to actually
213/// get row IDs for. (A table needs to be in both sets to be processed.)
215 table_map tables_to_get_rowid_for);
216
218 table_map tables_to_get_rowid_for);
219
220inline bool ShouldCopyRowId(const TABLE *table) {
221 // It is not safe to copy the row ID if we have a NULL-complemented row; the
222 // value is undefined, or the buffer location can even be nullptr.
223 return !table->const_table && !(table->is_nullable() && table->null_row);
224}
225
227 uchar *dptr) {
228 for (const Table &tbl : tables.tables()) {
229 const TABLE *table = tbl.table;
230
231 NullRowFlag null_row_flag = NullRowFlag::kNotNull;
232 if (table->is_nullable()) {
233 if (table->has_null_row()) {
234 null_row_flag = tbl.store_contents_of_null_rows && table->has_row()
237 }
238 *dptr++ = static_cast<uchar>(null_row_flag);
239 if (null_row_flag == NullRowFlag::kNullWithData) {
240 assert(table->is_started());
241 // If we want to store the actual values in the table buffer for the
242 // NULL-complemented row, instead of the NULLs, we need to restore the
243 // original null flags first. We reset the flags after we have stored
244 // the column values.
245 tbl.table->restore_null_flags();
246 tbl.table->reset_null_row();
247 }
248 }
249
250 // Store the NULL flags.
251 if (tbl.copy_null_flags) {
252 memcpy(dptr, table->null_flags, table->s->null_bytes);
253 dptr += table->s->null_bytes;
254 }
255
256 for (const Column &column : tbl.columns) {
258 column.field->field_index()));
259 if (!column.field->is_null()) {
260 // Store the data in packed format. The packed format will also
261 // include the length of the data if needed.
262 dptr = column.field->pack(dptr);
263 }
264 }
265
266 if (null_row_flag == NullRowFlag::kNullWithData) {
267 // The null flags were changed in order to get the actual contents of the
268 // null row stored. Restore the original null flags.
269 tbl.table->set_null_row();
270 }
271
272 if (tables.store_rowids() && ShouldCopyRowId(table)) {
273 // Store the row ID, since it is needed by weedout.
274 memcpy(dptr, table->file->ref, table->file->ref_length);
275 dptr += table->file->ref_length;
276 }
277 }
278 return dptr;
279}
280
281} // namespace pack_rows
282
283#endif // SQL_PACK_ROWS_H_
Definition: field.h:577
TABLE * table
Pointer to TABLE object that owns this field.
Definition: field.h:683
uint16 field_index() const
Returns field index.
Definition: field.h:1840
bool is_null(ptrdiff_t row_offset=0) const
Check whether the full table's row is NULL or the Field has value NULL.
Definition: field.h:1229
virtual uchar * pack(uchar *to, const uchar *from, size_t max_length) const
Pack the field into a format suitable for storage and transfer.
Definition: field.cc:2033
Definition: sql_optimizer.h:133
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:71
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:167
A structure that contains a list of input tables for a hash join operation, BKA join operation or a s...
Definition: pack_rows.h:93
Prealloced_array< Table, 4 > m_tables
Definition: pack_rows.h:118
void AddTable(TABLE *tab, bool store_contents_of_null_rows)
Definition: pack_rows.cc:78
table_map tables_to_get_rowid_for() const
Definition: pack_rows.h:111
bool has_blob_column() const
Definition: pack_rows.h:107
bool store_rowids() const
Definition: pack_rows.h:109
bool m_store_rowids
Definition: pack_rows.h:134
size_t ref_and_null_bytes_size() const
Definition: pack_rows.h:105
table_map m_tables_bitmap
Definition: pack_rows.h:122
table_map m_tables_to_get_rowid_for
Definition: pack_rows.h:135
size_t m_ref_and_null_bytes_size
Definition: pack_rows.h:125
table_map tables_bitmap() const
Definition: pack_rows.h:103
const Prealloced_array< Table, 4 > & tables() const
Definition: pack_rows.h:101
bool m_has_blob_column
Definition: pack_rows.h:132
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
static bool bitmap_is_set(const MY_BITMAP *map, uint bit)
Definition: my_bitmap.h:95
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:99
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
uint64_t table_map
Definition: my_table_map.h:30
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Definition: pack_rows.cc:36
void PrepareForRequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
Definition: pack_rows.cc:334
bool ShouldCopyRowId(const TABLE *table)
Definition: pack_rows.h:220
bool StoreFromTableBuffers(const TableCollection &tables, String *buffer)
Take the data marked for reading in "tables" and store it in the provided buffer.
Definition: pack_rows.cc:247
ALWAYS_INLINE uchar * StoreFromTableBuffersRaw(const TableCollection &tables, uchar *dptr)
Definition: pack_rows.h:226
size_t ComputeRowSizeUpperBound(const TableCollection &tables)
Similar to ComputeRowSizeUpperBoundSansBlobs, but will calculate blob size as well.
Definition: pack_rows.cc:207
const uchar * LoadIntoTableBuffers(const TableCollection &tables, const uchar *ptr)
Take the data in "ptr" and put it back to the tables' record buffers.
Definition: pack_rows.cc:273
void RequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
For each of the given tables, request that the row ID is filled in (the equivalent of calling file->p...
Definition: pack_rows.cc:323
NullRowFlag
Possible values of the NULL-row flag stored by StoreFromTableBuffers().
Definition: pack_rows.h:147
@ kNullWithoutData
The row is NULL-complemented. No column values are stored in the buffer.
@ kNotNull
The row is not a NULL-complemented one.
@ kNullWithData
The row is NULL-complemented.
size_t ComputeRowSizeUpperBoundSansBlobs(const TableCollection &tables)
Count up how many bytes a single row from the given tables will occupy, in "packed" format.
Definition: pack_rows.cc:228
Performance schema instrumentation interface.
Definition: table.h:1421
MY_BITMAP read_set_internal
A bitmap of fields that are explicitly referenced by the query.
Definition: table.h:1739
A class that represents a field, which also holds a cached value of the field's data type.
Definition: pack_rows.h:61
const enum_field_types field_type
Definition: pack_rows.h:67
Field *const field
Definition: pack_rows.h:63
Column(Field *field)
Definition: pack_rows.cc:38
This struct is primarily used for holding the extracted columns in a hash join or BKA join,...
Definition: pack_rows.h:74
Table(TABLE *table_arg)
Definition: pack_rows.cc:43
bool copy_null_flags
Definition: pack_rows.h:80
bool store_contents_of_null_rows
Definition: pack_rows.h:87
Prealloced_array< Column, 8 > columns
Definition: pack_rows.h:77
TABLE * table
Definition: pack_rows.h:76
#define PSI_NOT_INSTRUMENTED
Definition: validate_password_imp.cc:44