MySQL 8.0.32
Source Code Documentation
pack_rows.h
Go to the documentation of this file.
1#ifndef SQL_PACK_ROWS_H_
2#define SQL_PACK_ROWS_H_
3
4/* Copyright (c) 2020, 2022, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file
28
29 Generic routines for packing rows (possibly from multiple tables
30 at the same time) into strings, and then back again. Used for (at least)
31 hash join, BKA, and streaming aggregation.
32 */
33
34#include <assert.h>
35#include <stddef.h>
36#include <string.h>
37
38#include "field_types.h"
39#include "my_bitmap.h"
40#include "my_compiler.h"
41
42#include "my_inttypes.h"
43#include "my_table_map.h"
45#include "prealloced_array.h"
46#include "sql/field.h"
47#include "sql/handler.h"
48#include "sql/table.h"
49#include "template_utils.h"
50
51class JOIN;
52class String;
53
54// Names such as “Column” and “Table” are a tad too generic for the global
55// namespace.
56namespace pack_rows {
57
58/// A class that represents a field, which also holds a cached value of the
59/// field's data type.
60struct Column {
61 explicit Column(Field *field);
62 Field *const field;
63
64 // The field type is used frequently, and caching it gains around 30% in some
65 // of our microbenchmarks.
67};
68
69/// This struct is primarily used for holding the extracted columns in a hash
70/// join. When the hash join iterator is constructed, we extract the columns
71/// that are needed to satisfy the SQL query.
72struct Table {
73 explicit Table(TABLE *tab);
76
77 // Whether to copy the NULL flags or not.
78 bool copy_null_flags{false};
79};
80
81/// A structure that contains a list of tables for the hash join operation,
82/// and some pre-computed properties for the tables.
84 public:
85 TableCollection() = default;
86
89
90 // A single table (typically one for which there is no map bit).
91 explicit TableCollection(TABLE *table) { AddTable(table); }
92
93 const Prealloced_array<Table, 4> &tables() const { return m_tables; }
94
96
98
99 bool has_blob_column() const { return m_has_blob_column; }
100
101 bool store_rowids() const { return m_store_rowids; }
102
105 }
106
107 private:
108 void AddTable(TABLE *tab);
109
111
112 // We frequently use the bitmap to determine which side of the join an Item
113 // belongs to, so precomputing the bitmap saves quite some time.
115
116 // Sum of the NULL bytes and the row ID for all of the tables.
118
119 // Whether any of the tables has a BLOB/TEXT column. This is used to determine
120 // whether we need to estimate the row size every time we store a row to the
121 // row buffer or to a chunk file on disk. If this is set to false, we can
122 // pre-allocate any necessary buffers we need during the hash join, and thus
123 // eliminate the need for recalculating the row size every time.
124 bool m_has_blob_column = false;
125
126 bool m_store_rowids = false;
128};
129
130/// Count up how many bytes a single row from the given tables will occupy,
131/// in "packed" format. Note that this is an upper bound, so the length after
132/// calling Field::pack may very well be shorter than the size returned by this
133/// function.
134///
135/// The value returned from this function will sum up
136/// 1) The row-id if that is to be kept.
137/// 2) Size of the NULL flags.
138/// 3) Size of the buffer returned by pack() on all columns marked in the
139/// read_set.
140///
141/// Note that if any of the tables has a BLOB/TEXT column, this function looks
142/// at the data stored in the record buffers. This means that the function can
143/// not be called before reading any rows if tables.has_blob_column is true.
144size_t ComputeRowSizeUpperBound(const TableCollection &tables);
145
146/// Take the data marked for reading in "tables" and store it in the provided
147/// buffer. What data to store is determined by the read set of each table.
148/// Note that any existing data in "buffer" will be overwritten.
149///
150/// The output buffer will contain three things:
151///
152/// 1) NULL flags for each nullable column.
153/// 2) The row ID for each row. This is only stored if QEP_TAB::rowid_status !=
154/// NO_ROWID_NEEDED.
155/// 3) The actual data from the columns.
156///
157/// @retval true if error, false otherwise
159
160/// Take the data in "ptr" and put it back to the tables' record buffers.
161/// The tables must be _exactly_ the same as when the row was created.
162/// That is, it must contain the same tables in the same order, and the read set
163/// of each table must be identical when storing and restoring the row.
164/// If that's not the case, you will end up with undefined and unpredictable
165/// behavior.
166///
167/// Returns a pointer to where we ended reading.
168const uchar *LoadIntoTableBuffers(const TableCollection &tables,
169 const uchar *ptr);
170
171/// For each of the given tables, request that the row ID is filled in
172/// (the equivalent of calling file->position()) if needed.
173///
174/// @param tables All tables involved in the hash join.
175/// @param tables_to_get_rowid_for A bitmap of which tables to actually
176/// get row IDs for. (A table needs to be in both sets to be processed.)
178 table_map tables_to_get_rowid_for);
179
181 table_map tables_to_get_rowid_for);
182
183static bool ShouldCopyRowId(const TABLE *table) {
184 // It is not safe to copy the row ID if we have a NULL-complemented row; the
185 // value is undefined, or the buffer location can even be nullptr.
186 return !table->const_table && !(table->is_nullable() && table->null_row);
187}
188
190 const TableCollection &tables, uchar *dptr) {
191 for (const Table &tbl : tables.tables()) {
192 const TABLE *table = tbl.table;
193
194 // Store the NULL flags.
195 if (tbl.copy_null_flags) {
196 memcpy(dptr, table->null_flags, table->s->null_bytes);
197 dptr += table->s->null_bytes;
198 }
199
200 if (tbl.table->is_nullable()) {
201 const size_t null_row_size = sizeof(tbl.table->null_row);
202 memcpy(dptr, pointer_cast<const uchar *>(&tbl.table->null_row),
203 null_row_size);
204 dptr += null_row_size;
205 }
206
207 if (tables.store_rowids() && ShouldCopyRowId(tbl.table)) {
208 // Store the row ID, since it is needed by weedout.
209 memcpy(dptr, table->file->ref, table->file->ref_length);
210 dptr += table->file->ref_length;
211 }
212
213 for (const Column &column : tbl.columns) {
214 assert(bitmap_is_set(column.field->table->read_set,
215 column.field->field_index()));
216 if (!column.field->is_null()) {
217 // Store the data in packed format. The packed format will also
218 // include the length of the data if needed.
219 dptr = column.field->pack(dptr);
220 }
221 }
222 }
223 return dptr;
224}
225
226} // namespace pack_rows
227
228#endif // SQL_HASH_JOIN_BUFFER_H_
Definition: field.h:574
TABLE * table
Pointer to TABLE object that owns this field.
Definition: field.h:680
uint16 field_index() const
Returns field index.
Definition: field.h:1800
bool is_null(ptrdiff_t row_offset=0) const
Check whether the full table's row is NULL or the Field has value NULL.
Definition: field.h:1214
virtual uchar * pack(uchar *to, const uchar *from, size_t max_length) const
Pack the field into a format suitable for storage and transfer.
Definition: field.cc:1870
Definition: sql_optimizer.h:125
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:70
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:166
uchar * ref
Pointer to current row.
Definition: handler.h:4370
uint ref_length
Length of ref (1-8 or the clustered key length)
Definition: handler.h:4438
A structure that contains a list of tables for the hash join operation, and some pre-computed propert...
Definition: pack_rows.h:83
Prealloced_array< Table, 4 > m_tables
Definition: pack_rows.h:110
table_map tables_to_get_rowid_for() const
Definition: pack_rows.h:103
bool has_blob_column() const
Definition: pack_rows.h:99
bool store_rowids() const
Definition: pack_rows.h:101
bool m_store_rowids
Definition: pack_rows.h:126
TableCollection(TABLE *table)
Definition: pack_rows.h:91
void AddTable(TABLE *tab)
Definition: pack_rows.cc:71
size_t ref_and_null_bytes_size() const
Definition: pack_rows.h:97
table_map m_tables_bitmap
Definition: pack_rows.h:114
table_map m_tables_to_get_rowid_for
Definition: pack_rows.h:127
size_t m_ref_and_null_bytes_size
Definition: pack_rows.h:117
table_map tables_bitmap() const
Definition: pack_rows.h:95
const Prealloced_array< Table, 4 > & tables() const
Definition: pack_rows.h:93
bool m_has_blob_column
Definition: pack_rows.h:124
This file contains the field type.
enum_field_types
Column types for MySQL.
Definition: field_types.h:52
static bool bitmap_is_set(const MY_BITMAP *map, uint bit)
Definition: my_bitmap.h:94
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:109
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
uint64_t table_map
Definition: my_table_map.h:29
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Definition: pack_rows.cc:35
void PrepareForRequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
Definition: pack_rows.cc:275
bool StoreFromTableBuffers(const TableCollection &tables, String *buffer)
Take the data marked for reading in "tables" and store it in the provided buffer.
Definition: pack_rows.cc:202
size_t ComputeRowSizeUpperBound(const TableCollection &tables)
Count up how many bytes a single row from the given tables will occupy, in "packed" format.
Definition: pack_rows.cc:186
static bool ShouldCopyRowId(const TABLE *table)
Definition: pack_rows.h:183
static ALWAYS_INLINE uchar * StoreFromTableBuffersRaw(const TableCollection &tables, uchar *dptr)
Definition: pack_rows.h:189
const uchar * LoadIntoTableBuffers(const TableCollection &tables, const uchar *ptr)
Take the data in "ptr" and put it back to the tables' record buffers.
Definition: pack_rows.cc:228
void RequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
For each of the given tables, request that the row ID is filled in (the equivalent of calling file->p...
Definition: pack_rows.cc:264
Performance schema instrumentation interface.
uint null_bytes
Definition: table.h:836
Definition: table.h:1395
uchar * null_flags
Pointer to the null flags of record[0].
Definition: table.h:1579
bool null_row
Definition: table.h:1703
bool const_table
Definition: table.h:1725
MY_BITMAP * read_set
Set over all columns that the optimizer intends to read.
Definition: table.h:1621
bool is_nullable() const
Return whether table is nullable.
Definition: table.h:1971
handler * file
Definition: table.h:1397
TABLE_SHARE * s
Definition: table.h:1396
A class that represents a field, which also holds a cached value of the field's data type.
Definition: pack_rows.h:60
const enum_field_types field_type
Definition: pack_rows.h:66
Field *const field
Definition: pack_rows.h:62
Column(Field *field)
Definition: pack_rows.cc:37
This struct is primarily used for holding the extracted columns in a hash join.
Definition: pack_rows.h:72
Table(TABLE *tab)
Definition: pack_rows.cc:41
bool copy_null_flags
Definition: pack_rows.h:78
Prealloced_array< Column, 8 > columns
Definition: pack_rows.h:75
TABLE * table
Definition: pack_rows.h:74
#define PSI_NOT_INSTRUMENTED
Definition: validate_password_imp.cc:39