MySQL  8.0.27
Source Code Documentation
Go to the documentation of this file.
1 #ifndef SQL_PACK_ROWS_H_
2 #define SQL_PACK_ROWS_H_
4 /* Copyright (c) 2020, 2021, Oracle and/or its affiliates.
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  GNU General Public License, version 2.0, for more details.
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26 /**
27  @file
29  Generic routines for packing rows (possibly from multiple tables
30  at the same time) into strings, and then back again. Used for (at least)
31  hash join, BKA, and streaming aggregation.
32  */
34 #include <assert.h>
35 #include <stddef.h>
36 #include <string.h>
38 #include "field_types.h"
39 #include "my_bitmap.h"
40 #include "my_compiler.h"
42 #include "my_inttypes.h"
43 #include "my_table_map.h"
45 #include "prealloced_array.h"
46 #include "sql/field.h"
47 #include "sql/handler.h"
48 #include "sql/table.h"
49 #include "template_utils.h"
51 class JOIN;
52 class String;
54 // Names such as “Column” and “Table” are a tad too generic for the global
55 // namespace.
56 namespace pack_rows {
58 /// A class that represents a field, which also holds a cached value of the
59 /// field's data type.
60 struct Column {
61  explicit Column(Field *field);
62  Field *const field;
64  // The field type is used frequently, and caching it gains around 30% in some
65  // of our microbenchmarks.
67 };
69 /// This struct is primarily used for holding the extracted columns in a hash
70 /// join. When the hash join iterator is constructed, we extract the columns
71 /// that are needed to satisfy the SQL query.
72 struct Table {
73  explicit Table(TABLE *tab);
77  // Whether to copy the NULL flags or not.
78  bool copy_null_flags{false};
79 };
81 /// A structure that contains a list of tables for the hash join operation,
82 /// and some pre-computed properties for the tables.
84  public:
85  TableCollection() = default;
90  // A single table (typically one for which there is no map bit).
91  explicit TableCollection(TABLE *table) { AddTable(table); }
93  const Prealloced_array<Table, 4> &tables() const { return m_tables; }
99  bool has_blob_column() const { return m_has_blob_column; }
101  bool store_rowids() const { return m_store_rowids; }
105  }
107  private:
108  void AddTable(TABLE *tab);
112  // We frequently use the bitmap to determine which side of the join an Item
113  // belongs to, so precomputing the bitmap saves quite some time.
116  // Sum of the NULL bytes and the row ID for all of the tables.
119  // Whether any of the tables has a BLOB/TEXT column. This is used to determine
120  // whether we need to estimate the row size every time we store a row to the
121  // row buffer or to a chunk file on disk. If this is set to false, we can
122  // pre-allocate any necessary buffers we need during the hash join, and thus
123  // eliminate the need for recalculating the row size every time.
124  bool m_has_blob_column = false;
126  bool m_store_rowids = false;
128 };
130 /// Count up how many bytes a single row from the given tables will occupy,
131 /// in "packed" format. Note that this is an upper bound, so the length after
132 /// calling Field::pack may very well be shorter than the size returned by this
133 /// function.
134 ///
135 /// The value returned from this function will sum up
136 /// 1) The row-id if that is to be kept.
137 /// 2) Size of the NULL flags.
138 /// 3) Size of the buffer returned by pack() on all columns marked in the
139 /// read_set.
140 ///
141 /// Note that if any of the tables has a BLOB/TEXT column, this function looks
142 /// at the data stored in the record buffers. This means that the function can
143 /// not be called before reading any rows if tables.has_blob_column is true.
144 size_t ComputeRowSizeUpperBound(const TableCollection &tables);
146 /// Take the data marked for reading in "tables" and store it in the provided
147 /// buffer. What data to store is determined by the read set of each table.
148 /// Note that any existing data in "buffer" will be overwritten.
149 ///
150 /// The output buffer will contain three things:
151 ///
152 /// 1) NULL flags for each nullable column.
153 /// 2) The row ID for each row. This is only stored if QEP_TAB::rowid_status !=
155 /// 3) The actual data from the columns.
156 ///
157 /// @retval true if error, false otherwise
158 bool StoreFromTableBuffers(const TableCollection &tables, String *buffer);
160 /// Take the data in "ptr" and put it back to the tables' record buffers.
161 /// The tables must be _exactly_ the same as when the row was created.
162 /// That is, it must contain the same tables in the same order, and the read set
163 /// of each table must be identical when storing and restoring the row.
164 /// If that's not the case, you will end up with undefined and unpredictable
165 /// behavior.
166 ///
167 /// Returns a pointer to where we ended reading.
168 const uchar *LoadIntoTableBuffers(const TableCollection &tables,
169  const uchar *ptr);
171 /// For each of the given tables, request that the row ID is filled in
172 /// (the equivalent of calling file->position()) if needed.
173 ///
174 /// @param tables All tables involved in the hash join.
175 /// @param tables_to_get_rowid_for A bitmap of which tables to actually
176 /// get row IDs for. (A table needs to be in both sets to be processed.)
178  table_map tables_to_get_rowid_for);
181  table_map tables_to_get_rowid_for);
183 static bool ShouldCopyRowId(const TABLE *table) {
184  // It is not safe to copy the row ID if we have a NULL-complemented row; the
185  // value is undefined, or the buffer location can even be nullptr.
186  return !table->const_table && !(table->is_nullable() && table->null_row);
187 }
190  const TableCollection &tables, uchar *dptr) {
191  for (const Table &tbl : tables.tables()) {
192  const TABLE *table = tbl.table;
194  // Store the NULL flags.
195  if (tbl.copy_null_flags) {
196  memcpy(dptr, table->null_flags, table->s->null_bytes);
197  dptr += table->s->null_bytes;
198  }
200  if (tbl.table->is_nullable()) {
201  const size_t null_row_size = sizeof(tbl.table->null_row);
202  memcpy(dptr, pointer_cast<const uchar *>(&tbl.table->null_row),
203  null_row_size);
204  dptr += null_row_size;
205  }
207  if (tables.store_rowids() && ShouldCopyRowId(tbl.table)) {
208  // Store the row ID, since it is needed by weedout.
209  memcpy(dptr, table->file->ref, table->file->ref_length);
210  dptr += table->file->ref_length;
211  }
213  for (const Column &column : tbl.columns) {
214  assert(bitmap_is_set(column.field->table->read_set,
215  column.field->field_index()));
216  if (!column.field->is_null()) {
217  // Store the data in packed format. The packed format will also
218  // include the length of the data if needed.
219  dptr = column.field->pack(dptr);
220  }
221  }
222  }
223  return dptr;
224 }
226 } // namespace pack_rows
228 #endif // SQL_HASH_JOIN_BUFFER_H_
Definition: field.h:590
TABLE * table
Pointer to TABLE object that owns this field.
Definition: field.h:696
uint16 field_index() const
Returns field index.
Definition: field.h:1812
bool is_null(ptrdiff_t row_offset=0) const
Check whether the full table's row is NULL or the Field has value NULL.
Definition: field.h:1230
virtual uchar * pack(uchar *to, const uchar *from, size_t max_length) const
Pack the field into a format suitable for storage and transfer.
Definition: sql_optimizer.h:125
A typesafe replacement for DYNAMIC_ARRAY.
Definition: prealloced_array.h:70
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:165
uchar * ref
Pointer to current row.
Definition: handler.h:4147
uint ref_length
Length of ref (1-8 or the clustered key length)
Definition: handler.h:4215
A structure that contains a list of tables for the hash join operation, and some pre-computed propert...
Definition: pack_rows.h:83
Prealloced_array< Table, 4 > m_tables
Definition: pack_rows.h:110
table_map tables_to_get_rowid_for() const
Definition: pack_rows.h:103
bool has_blob_column() const
Definition: pack_rows.h:99
bool store_rowids() const
Definition: pack_rows.h:101
bool m_store_rowids
Definition: pack_rows.h:126
TableCollection(TABLE *table)
Definition: pack_rows.h:91
void AddTable(TABLE *tab)
size_t ref_and_null_bytes_size() const
Definition: pack_rows.h:97
table_map m_tables_bitmap
Definition: pack_rows.h:114
table_map m_tables_to_get_rowid_for
Definition: pack_rows.h:127
size_t m_ref_and_null_bytes_size
Definition: pack_rows.h:117
table_map tables_bitmap() const
Definition: pack_rows.h:95
const Prealloced_array< Table, 4 > & tables() const
Definition: pack_rows.h:93
bool m_has_blob_column
Definition: pack_rows.h:124
This file contains the field type.
Column types for MySQL.
Definition: field_types.h:57
static bool bitmap_is_set(const MY_BITMAP *map, uint bit)
Definition: my_bitmap.h:90
Header for compiler-dependent features.
Definition: my_compiler.h:109
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
uint64_t table_map
Definition: my_table_map.h:29
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:391
static ALWAYS_INLINE uchar * StoreFromTableBuffersRaw(const TableCollection &tables, uchar *dptr)
Definition: pack_rows.h:189
void PrepareForRequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
bool StoreFromTableBuffers(const TableCollection &tables, String *buffer)
Take the data marked for reading in "tables" and store it in the provided buffer.
size_t ComputeRowSizeUpperBound(const TableCollection &tables)
Count up how many bytes a single row from the given tables will occupy, in "packed" format.
static bool ShouldCopyRowId(const TABLE *table)
Definition: pack_rows.h:183
const uchar * LoadIntoTableBuffers(const TableCollection &tables, const uchar *ptr)
Take the data in "ptr" and put it back to the tables' record buffers.
void RequestRowId(const Prealloced_array< Table, 4 > &tables, table_map tables_to_get_rowid_for)
For each of the given tables, request that the row ID is filled in (the equivalent of calling file->p...
Performance schema instrumentation interface.
uint null_bytes
Definition: table.h:836
Definition: table.h:1394
uchar * null_flags
Pointer to the null flags of record[0].
Definition: table.h:1507
bool null_row
Definition: table.h:1631
bool const_table
Definition: table.h:1653
MY_BITMAP * read_set
Set over all columns that the optimizer intends to read.
Definition: table.h:1549
bool is_nullable() const
Return whether table is nullable.
Definition: table.h:1901
handler * file
Definition: table.h:1396
Definition: table.h:1395
A class that represents a field, which also holds a cached value of the field's data type.
Definition: pack_rows.h:60
const enum_field_types field_type
Definition: pack_rows.h:66
Field *const field
Definition: pack_rows.h:62
Column(Field *field)
This struct is primarily used for holding the extracted columns in a hash join.
Definition: pack_rows.h:72
Table(TABLE *tab)
bool copy_null_flags
Definition: pack_rows.h:78
Prealloced_array< Column, 8 > columns
Definition: pack_rows.h:75
TABLE * table
Definition: pack_rows.h:74