MySQL 9.5.0
Source Code Documentation
ddl0bulk.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2022, 2025, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ddl0bulk.h
29BULK Data Load. Currently treated like DDL */
30
31#pragma once
32
33#include <list>
34#include "btr0mtib.h"
35#include "row0mysql.h"
36#include "sql/handler.h"
37
38namespace ddl_bulk {
39
40class Loader {
41 public:
42 using Blob_context = void *;
43 using byte = unsigned char;
44
46 public:
47 /** Initialize thread specific data.
48 @param[in] prebuilt prebuilt structures from innodb table handler */
49 void init(const row_prebuilt_t *prebuilt);
50
51 /** Load rows to a sub-tree for a specific thread.
52 @param[in] prebuilt prebuilt structures from innodb table handler
53 @param[in,out] sub_tree sub tree to load data to
54 @param[in] rows rows to be loaded to the cluster index sub-tree
55 @param[in] wait_cbk Stat callbacks
56 @return innodb error code */
57 dberr_t load(const row_prebuilt_t *prebuilt,
58 Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows,
60
61 public:
62 /** Create a blob.
63 @param[in] sub_tree sub tree to load data to
64 @param[out] blob_ctx pointer to an opaque object representing a blob.
65 @param[out] ref blob reference to be placed in the record.
66 @return DB_SUCCESS on success or a failure error code. */
68 lob::ref_t &ref) {
69 return sub_tree->open_blob(blob_ctx, ref);
70 }
71
72 /** Write data into the blob.
73 @param[in] sub_tree sub tree to load data to
74 @param[in] blob_ctx pointer to blob into which data is written.
75 @param[out] ref blob reference to be placed in the record.
76 @param[in] data buffer containing data to be written
77 @param[in] len length of the data to be written.
78 @return DB_SUCCESS on success or a failure error code. */
80 lob::ref_t &ref, const byte *data, size_t len) {
81 return sub_tree->write_blob(blob_ctx, ref, data, len);
82 }
83
84 /** Indicate that the blob has been completed, so that resources can be
85 removed, and as necessary flushing can be done.
86 @param[in] sub_tree sub tree to load data to
87 @param[in] blob_ctx pointer to blob which has been completely written.
88 @param[out] ref blob reference to be placed in the record.
89 @return DB_SUCCESS on success or a failure error code. */
91 lob::ref_t &ref) {
92 return sub_tree->close_blob(blob_ctx, ref);
93 }
94
95 public:
96 /** Free thread specific data. */
97 void free();
98
99 dberr_t get_error() const { return m_err; }
100 std::string get_error_string() const { return m_sout.str(); }
101
102 /** Get the client error code (eg. ER_LOAD_BULK_DATA_UNSORTED).
103 @return the client error code. */
104 int get_error_code() const { return m_errcode; }
105
106 /** Add given subtree to the list of subtrees.
107 @param[in] subtree the subtree to be added. */
109 m_list_subtrees.push_back(subtree);
110 }
111
112 /** Get the last subtree created by this thread. */
114
115 /** Flush queue size used by the Bulk_flusher */
117
118 /** Each subtree needs to have a disjoint set of keys. In the case of
119 generated DB_ROW_ID as PK, each thread can build one subtree for one range
120 of row ids. */
121 std::list<Btree_multi::Btree_load *> m_list_subtrees;
122
123 /** The last DB_ROW_ID used by this thread. */
124 uint64_t m_last_rowid{0};
125
126 private:
127 /** Fill system columns for index entry to be loaded.
128 @param[in] prebuilt prebuilt structures from innodb table handler */
129 void fill_system_columns(const row_prebuilt_t *prebuilt);
130
131 /** Fill the tuple to set the column data
132 @param[in] prebuilt prebuilt structures from innodb table handler
133 @param[in] rows sql rows with column data
134 @param[in] row_index current row index
135 @param[in] gcol_heap memory heap used for generated columns
136 @param[in,out] gcol_blobs_flushed true if blobs are flushed, false
137 otherwise. This is needed only when we have gcol on blobs.
138 @return innodb error code. */
139 dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows,
140 size_t row_index, mem_heap_t *gcol_heap,
141 bool &gcol_blobs_flushed);
142
143 dberr_t setup_dfield(const row_prebuilt_t *prebuilt, Field *field,
144 const Column_mysql &sql_col, dfield_t *src_dfield,
145 dfield_t *dst_dfield);
146
147 /** Fill he cluster index entry from tuple data.
148 @param[in] prebuilt prebuilt structures from innodb table handler */
149 void fill_index_entry(const row_prebuilt_t *prebuilt);
150
151 /** Store integer column in Innodb format.
152 @param[in] col sql column data
153 @param[in,out] data_ptr data buffer for storing converted data
154 @param[in,out] data_len data buffer length
155 @return true if successful. */
156 bool store_int_col(const Column_mysql &col, byte *data_ptr,
157 size_t &data_len);
158
159 private:
160 /** Heap for allocating tuple memory. */
162
163 /** Tuple for converting input data to table row. */
165
166 /** Tuple for inserting row to cluster index. */
168
169 /** Column data for system column transaction ID. */
171
172 /** Column data for system column Roll pointer. */
174
175 /** Column data for system column DATA_ROW_ID. */
177
178 /** Error code at thread level. */
180
181 int m_errcode{0};
182
184
186 };
187
188 /** Loader context constructor.
189 @param[in] num_threads Number of threads to use for bulk loading
190 @param[in] keynr index number
191 @param[in] trx transaction context. */
192 Loader(size_t num_threads, size_t keynr, const trx_t *trx)
193 : m_num_threads(num_threads), m_keynr(keynr), m_trx(trx) {}
194
195 /** Prepare bulk loading by multiple threads.
196 @param[in] prebuilt prebuilt structures from innodb table handler
197 @param[in] data_size total data size to load in bytes
198 @param[in] memory memory to be used from buffer pool
199 @return innodb error code */
200 dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size,
201 size_t memory);
202
203 /** Load rows to a sub-tree by a thread. Called concurrently by multiple
204 execution threads.
205 @param[in] prebuilt prebuilt structures from innodb table handler
206 @param[in] thread_index identifies the thread and the B-tree to use.
207 @param[in] rows rows to be loaded to the cluster index sub-tree
208 @param[in] wait_cbk Stat callbacks
209 @return innodb error code */
210 dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index,
211 const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk);
212
213 size_t get_keynr() const { return m_keynr; }
214
215 public:
216 /** Open a blob.
217 @param[in] thread_index identifies the thread and the B-tree to use.
218 @param[out] blob_ctx pointer to an opaque object representing a blob.
219 @param[out] ref blob reference to be placed in the record.
220 @return DB_SUCCESS on success or a failure error code. */
221 dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx,
222 lob::ref_t &ref);
223
224 /** Write data into the blob.
225 @param[in] thread_index identifies the thread and the B-tree to use.
226 @param[in] blob_ctx pointer to blob into which data is written.
227 @param[out] ref blob reference to be placed in the record.
228 @param[in] data buffer containing data to be written
229 @param[in] len length of the data to be written.
230 @return DB_SUCCESS on success or a failure error code. */
231 dberr_t write_blob(size_t thread_index, Blob_context blob_ctx,
232 lob::ref_t &ref, const byte *data, size_t len);
233
234 /** Indicate that the blob has been completed, so that resources can be
235 removed, and as necessary flushing can be done.
236 @param[in] thread_index identifies the thread and the B-tree to use.
237 @param[in] blob_ctx pointer to blob which has been completely written.
238 @param[out] ref blob reference to be placed in the record.
239 @return DB_SUCCESS on success or a failure error code. */
240 dberr_t close_blob(size_t thread_index, Blob_context blob_ctx,
241 lob::ref_t &ref);
242
243 /** Finish bulk load operation, combining the sub-trees produced by
244 concurrent threads.
245 @param[in] is_error true if called for cleanup and rollback after an error
246 @return innodb error code */
247 dberr_t end(bool is_error);
248
251 using Thread_ctxs = std::vector<Thread_data, ut::allocator<Thread_data>>;
252
253 dberr_t get_error() const;
254 std::string get_error_string() const;
255
256 /** Get the client error code (e.g. ER_LOAD_BULK_DATA_UNSORTED).
257 @return the client error code. */
258 int get_error_code() const;
259
260 /** @return table name where the data is being loaded. */
261 const char *get_table_name() const { return m_table->name.m_name; }
262
263 /** @return index name where the data is being loaded. */
264 const char *get_index_name() const { return m_index->name(); }
265
266 private:
267 /** Ensure that dict_sys->row_id is greater than max rowid used in bulk
268 load of this table.
269 @param[in] max_rowid max rowid used in this table. */
270 void set_sys_max_rowid(uint64_t max_rowid);
271
272 /** Merge the sub-trees to build the cluster index.
273 @return innodb error code. */
275
276 /** Calculate the flush queue size to be used based on the available memory.
277 @param[in] memory total buffer pool memory to use
278 @param[out] flush_queue_size calculated queue size
279 @param[out] allocate_in_pages true if need to allocate in pages
280 false if need to allocate in extents */
281 void get_queue_size(size_t memory, size_t &flush_queue_size,
282 bool &allocate_in_pages) const;
283
284 private:
285 /** Number of threads for bulk loading. */
286 const size_t m_num_threads{};
287
288 const size_t m_keynr{};
289
290 /** All thread specific data. */
292
293 /** Sub-tree loading contexts. */
295
296 /** Innodb dictionary table object. */
298
299 /** Index being loaded. Could be primary or secondary index. */
301
302 /** Allocator to extend tablespace and allocate extents. */
304
305 const trx_t *const m_trx{};
306
307 /** Flush queue size used by the Bulk_flusher */
309
310 std::mutex m_gcol_mutex;
311};
312
313inline std::string Loader::get_error_string() const {
314 std::string error;
315 for (auto &thr : m_ctxs) {
316 if (thr.get_error() != DB_SUCCESS) {
317 error = thr.get_error_string();
318 break;
319 }
320 }
321 return error;
322}
323
324inline int Loader::get_error_code() const {
325 int errcode = 0;
326 for (auto &thr : m_ctxs) {
327 errcode = thr.get_error_code();
328 if (errcode != 0) {
329 break;
330 }
331 }
332 return errcode;
333}
334
337 for (auto &thr : m_ctxs) {
338 e = thr.get_error();
339 if (e != DB_SUCCESS) {
340 break;
341 }
342 }
343 return e;
344}
345
346} // namespace ddl_bulk
Multi Threaded Index Build (MTIB) using BUF_BLOCK_MEMORY and dedicated Bulk_flusher threads.
void * Blob_context
Definition: bulk_data_service.h:50
Definition: btr0mtib.h:895
dberr_t open_blob(Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: btr0mtib.h:908
dberr_t close_blob(Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: btr0mtib.h:928
dberr_t write_blob(Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: btr0mtib.h:918
Definition: btr0mtib.h:479
Definition: field.h:570
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:302
Definition: ddl0bulk.h:45
std::list< Btree_multi::Btree_load * > m_list_subtrees
Each subtree needs to have a disjoint set of keys.
Definition: ddl0bulk.h:121
std::ostringstream m_sout
Definition: ddl0bulk.h:183
Btree_multi::Btree_load * get_subtree()
Get the last subtree created by this thread.
Definition: ddl0bulk.h:113
void free()
Free thread specific data.
Definition: ddl0bulk.cc:329
dberr_t m_err
Error code at thread level.
Definition: ddl0bulk.h:179
dberr_t write_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.h:79
dberr_t open_blob(Btree_multi::Btree_load *sub_tree, Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: ddl0bulk.h:67
std::string get_error_string() const
Definition: ddl0bulk.h:100
dtuple_t * m_entry
Tuple for inserting row to cluster index.
Definition: ddl0bulk.h:167
dtuple_t * m_row
Tuple for converting input data to table row.
Definition: ddl0bulk.h:164
unsigned char m_rowid_data[DATA_ROW_ID_LEN]
Column data for system column DATA_ROW_ID.
Definition: ddl0bulk.h:176
int get_error_code() const
Get the client error code (eg.
Definition: ddl0bulk.h:104
mem_heap_t * m_heap
Heap for allocating tuple memory.
Definition: ddl0bulk.h:161
size_t m_nth_index
Definition: ddl0bulk.h:185
dberr_t close_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.h:90
dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows, size_t row_index, mem_heap_t *gcol_heap, bool &gcol_blobs_flushed)
Fill the tuple to set the column data.
Definition: ddl0bulk.cc:564
int m_errcode
Definition: ddl0bulk.h:181
unsigned char m_rollptr_data[DATA_ROLL_PTR_LEN]
Column data for system column Roll pointer.
Definition: ddl0bulk.h:173
dberr_t setup_dfield(const row_prebuilt_t *prebuilt, Field *field, const Column_mysql &sql_col, dfield_t *src_dfield, dfield_t *dst_dfield)
Definition: ddl0bulk.cc:467
void add_subtree(Btree_multi::Btree_load *subtree)
Add given subtree to the list of subtrees.
Definition: ddl0bulk.h:108
dberr_t load(const row_prebuilt_t *prebuilt, Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree for a specific thread.
Definition: ddl0bulk.cc:190
uint64_t m_last_rowid
The last DB_ROW_ID used by this thread.
Definition: ddl0bulk.h:124
void init(const row_prebuilt_t *prebuilt)
Initialize thread specific data.
Definition: ddl0bulk.cc:48
dberr_t get_error() const
Definition: ddl0bulk.h:99
size_t m_queue_size
Flush queue size used by the Bulk_flusher.
Definition: ddl0bulk.h:116
void fill_index_entry(const row_prebuilt_t *prebuilt)
Fill he cluster index entry from tuple data.
Definition: ddl0bulk.cc:425
void fill_system_columns(const row_prebuilt_t *prebuilt)
Fill system columns for index entry to be loaded.
Definition: ddl0bulk.cc:403
bool store_int_col(const Column_mysql &col, byte *data_ptr, size_t &data_len)
Store integer column in Innodb format.
Definition: ddl0bulk.cc:835
unsigned char m_trx_data[DATA_TRX_ID_LEN]
Column data for system column transaction ID.
Definition: ddl0bulk.h:170
Definition: ddl0bulk.h:40
std::mutex m_gcol_mutex
Definition: ddl0bulk.h:310
std::vector< Btree_multi::Btree_load *, ut::allocator< Btree_multi::Btree_load * > > Btree_loads
Definition: ddl0bulk.h:250
Btree_loads m_sub_tree_loads
Sub-tree loading contexts.
Definition: ddl0bulk.h:294
void get_queue_size(size_t memory, size_t &flush_queue_size, bool &allocate_in_pages) const
Calculate the flush queue size to be used based on the available memory.
Definition: ddl0bulk.cc:85
Loader(size_t num_threads, size_t keynr, const trx_t *trx)
Loader context constructor.
Definition: ddl0bulk.h:192
dict_index_t * m_index
Index being loaded.
Definition: ddl0bulk.h:300
const char * get_index_name() const
Definition: ddl0bulk.h:264
const trx_t *const m_trx
Definition: ddl0bulk.h:305
dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size, size_t memory)
Prepare bulk loading by multiple threads.
Definition: ddl0bulk.cc:109
Btree_multi::Bulk_extent_allocator m_extent_allocator
Allocator to extend tablespace and allocate extents.
Definition: ddl0bulk.h:303
dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree by a thread.
Definition: ddl0bulk.cc:147
const char * get_table_name() const
Definition: ddl0bulk.h:261
int get_error_code() const
Get the client error code (e.g.
Definition: ddl0bulk.h:324
size_t m_queue_size
Flush queue size used by the Bulk_flusher.
Definition: ddl0bulk.h:308
size_t get_keynr() const
Definition: ddl0bulk.h:213
dict_table_t * m_table
Innodb dictionary table object.
Definition: ddl0bulk.h:297
dberr_t merge_subtrees()
Merge the sub-trees to build the cluster index.
Definition: ddl0bulk.cc:899
dberr_t end(bool is_error)
Finish bulk load operation, combining the sub-trees produced by concurrent threads.
Definition: ddl0bulk.cc:336
void * Blob_context
Definition: ddl0bulk.h:42
dberr_t get_error() const
Definition: ddl0bulk.h:335
std::string get_error_string() const
Definition: ddl0bulk.h:313
dberr_t close_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.cc:181
void set_sys_max_rowid(uint64_t max_rowid)
Ensure that dict_sys->row_id is greater than max rowid used in bulk load of this table.
Definition: ddl0bulk.cc:392
dberr_t write_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.cc:172
std::vector< Thread_data, ut::allocator< Thread_data > > Thread_ctxs
Definition: ddl0bulk.h:251
Thread_ctxs m_ctxs
All thread specific data.
Definition: ddl0bulk.h:291
dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx, lob::ref_t &ref)
Open a blob.
Definition: ddl0bulk.cc:163
const size_t m_num_threads
Number of threads for bulk loading.
Definition: ddl0bulk.h:286
const size_t m_keynr
Definition: ddl0bulk.h:288
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2183
constexpr size_t DATA_ROLL_PTR_LEN
Rollback data pointer type size in bytes.
Definition: data0type.h:191
constexpr size_t DATA_TRX_ID_LEN
Transaction ID type size in bytes.
Definition: data0type.h:185
constexpr uint32_t DATA_ROW_ID_LEN
stored length for row id
Definition: data0type.h:179
dberr_t
Definition: db0err.h:39
@ DB_SUCCESS
Definition: db0err.h:43
void error(const char *format,...)
PT & ref(PT *tp)
Definition: tablespace_impl.cc:359
Definition: ddl0bulk.cc:46
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2876
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2880
Interface between Innobase row operations and MySQL.
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:860
Definition: bulk_data_service.h:205
Structure for an SQL data field.
Definition: data0data.h:617
Data structure for an index.
Definition: dict0mem.h:1041
id_name_t name
index name
Definition: dict0mem.h:1049
Data structure for a database table.
Definition: dict0mem.h:1922
table_name_t name
Table name.
Definition: dict0mem.h:1997
Structure for an SQL data tuple of fields (logical record)
Definition: data0data.h:696
The struct 'lob::ref_t' represents an external field reference.
Definition: lob0lob.h:198
The info structure stored at the beginning of a heap block.
Definition: mem0mem.h:302
A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; th...
Definition: row0mysql.h:515
char * m_name
The name in internal representation.
Definition: dict0mem.h:467
Definition: trx0trx.h:675