MySQL 9.4.0
Source Code Documentation
ddl0bulk.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2022, 2025, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ddl0bulk.h
29BULK Data Load. Currently treated like DDL */
30
31#pragma once
32
33#include <list>
34#include "btr0mtib.h"
35#include "row0mysql.h"
36#include "sql/handler.h"
37
38namespace ddl_bulk {
39
40class Loader {
41 public:
42 using Blob_context = void *;
43 using byte = unsigned char;
44
46 public:
47 /** Initialize thread specific data.
48 @param[in] prebuilt prebuilt structures from innodb table handler */
49 void init(const row_prebuilt_t *prebuilt);
50
51 /** Load rows to a sub-tree for a specific thread.
52 @param[in] prebuilt prebuilt structures from innodb table handler
53 @param[in,out] sub_tree sub tree to load data to
54 @param[in] rows rows to be loaded to the cluster index sub-tree
55 @param[in] wait_cbk Stat callbacks
56 @return innodb error code */
57 dberr_t load(const row_prebuilt_t *prebuilt,
58 Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows,
60
61 public:
62 /** Create a blob.
63 @param[in] sub_tree sub tree to load data to
64 @param[out] blob_ctx pointer to an opaque object representing a blob.
65 @param[out] ref blob reference to be placed in the record.
66 @return DB_SUCCESS on success or a failure error code. */
68 lob::ref_t &ref) {
69 return sub_tree->open_blob(blob_ctx, ref);
70 }
71
72 /** Write data into the blob.
73 @param[in] sub_tree sub tree to load data to
74 @param[in] blob_ctx pointer to blob into which data is written.
75 @param[out] ref blob reference to be placed in the record.
76 @param[in] data buffer containing data to be written
77 @param[in] len length of the data to be written.
78 @return DB_SUCCESS on success or a failure error code. */
80 lob::ref_t &ref, const byte *data, size_t len) {
81 return sub_tree->write_blob(blob_ctx, ref, data, len);
82 }
83
84 /** Indicate that the blob has been completed, so that resources can be
85 removed, and as necessary flushing can be done.
86 @param[in] sub_tree sub tree to load data to
87 @param[in] blob_ctx pointer to blob which has been completely written.
88 @param[out] ref blob reference to be placed in the record.
89 @return DB_SUCCESS on success or a failure error code. */
91 lob::ref_t &ref) {
92 return sub_tree->close_blob(blob_ctx, ref);
93 }
94
95 public:
96 /** Free thread specific data. */
97 void free();
98
99 dberr_t get_error() const { return m_err; }
100 std::string get_error_string() const { return m_sout.str(); }
101
102 /** Get the client error code (eg. ER_LOAD_BULK_DATA_UNSORTED).
103 @return the client error code. */
104 int get_error_code() const { return m_errcode; }
105
106 /** Add given subtree to the list of subtrees.
107 @param[in] subtree the subtree to be added. */
109 m_list_subtrees.push_back(subtree);
110 }
111
112 /** Get the last subtree created by this thread. */
114
115 /** Flush queue size used by the Bulk_flusher */
117
118 /** Each subtree needs to have a disjoint set of keys. In the case of
119 generated DB_ROW_ID as PK, each thread can build one subtree for one range
120 of row ids. */
121 std::list<Btree_multi::Btree_load *> m_list_subtrees;
122
123 /** The last DB_ROW_ID used by this thread. */
124 uint64_t m_last_rowid{0};
125
126 private:
127 /** Fill system columns for index entry to be loaded.
128 @param[in] prebuilt prebuilt structures from innodb table handler */
129 void fill_system_columns(const row_prebuilt_t *prebuilt);
130
131 /** Fill the tuple to set the column data
132 @param[in] prebuilt prebuilt structures from innodb table handler
133 @param[in] rows sql rows with column data
134 @param[in] row_index current row index
135 @return innodb error code. */
136 dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows,
137 size_t row_index);
138
139 /** Fill he cluster index entry from tuple data.
140 @param[in] prebuilt prebuilt structures from innodb table handler */
141 void fill_index_entry(const row_prebuilt_t *prebuilt);
142
143 /** Store integer column in Innodb format.
144 @param[in] col sql column data
145 @param[in,out] data_ptr data buffer for storing converted data
146 @param[in,out] data_len data buffer length
147 @return true if successful. */
148 bool store_int_col(const Column_mysql &col, byte *data_ptr,
149 size_t &data_len);
150
151 private:
152 /** Heap for allocating tuple memory. */
154
155 /** Tuple for converting input data to table row. */
157
158 /** Tuple for inserting row to cluster index. */
160
161 /** Column data for system column transaction ID. */
163
164 /** Column data for system column Roll pointer. */
166
167 /** Column data for system column DATA_ROW_ID. */
169
170 /** Error code at thread level. */
172
173 int m_errcode{0};
174
176
178 };
179
180 /** Loader context constructor.
181 @param[in] num_threads Number of threads to use for bulk loading
182 @param[in] keynr index number
183 @param[in] trx transaction context. */
184 Loader(size_t num_threads, size_t keynr, const trx_t *trx)
185 : m_num_threads(num_threads), m_keynr(keynr), m_trx(trx) {}
186
187 /** Prepare bulk loading by multiple threads.
188 @param[in] prebuilt prebuilt structures from innodb table handler
189 @param[in] data_size total data size to load in bytes
190 @param[in] memory memory to be used from buffer pool
191 @return innodb error code */
192 dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size,
193 size_t memory);
194
195 /** Load rows to a sub-tree by a thread. Called concurrently by multiple
196 execution threads.
197 @param[in] prebuilt prebuilt structures from innodb table handler
198 @param[in] thread_index identifies the thread and the B-tree to use.
199 @param[in] rows rows to be loaded to the cluster index sub-tree
200 @param[in] wait_cbk Stat callbacks
201 @return innodb error code */
202 dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index,
203 const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk);
204
205 size_t get_keynr() const { return m_keynr; }
206
207 public:
208 /** Open a blob.
209 @param[in] thread_index identifies the thread and the B-tree to use.
210 @param[out] blob_ctx pointer to an opaque object representing a blob.
211 @param[out] ref blob reference to be placed in the record.
212 @return DB_SUCCESS on success or a failure error code. */
213 dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx,
214 lob::ref_t &ref);
215
216 /** Write data into the blob.
217 @param[in] thread_index identifies the thread and the B-tree to use.
218 @param[in] blob_ctx pointer to blob into which data is written.
219 @param[out] ref blob reference to be placed in the record.
220 @param[in] data buffer containing data to be written
221 @param[in] len length of the data to be written.
222 @return DB_SUCCESS on success or a failure error code. */
223 dberr_t write_blob(size_t thread_index, Blob_context blob_ctx,
224 lob::ref_t &ref, const byte *data, size_t len);
225
226 /** Indicate that the blob has been completed, so that resources can be
227 removed, and as necessary flushing can be done.
228 @param[in] thread_index identifies the thread and the B-tree to use.
229 @param[in] blob_ctx pointer to blob which has been completely written.
230 @param[out] ref blob reference to be placed in the record.
231 @return DB_SUCCESS on success or a failure error code. */
232 dberr_t close_blob(size_t thread_index, Blob_context blob_ctx,
233 lob::ref_t &ref);
234
235 /** Finish bulk load operation, combining the sub-trees produced by concurrent
236 threads.
237 @param[in] is_error true if called for cleanup and rollback after an error
238 @return innodb error code */
239 dberr_t end(bool is_error);
240
243 using Thread_ctxs = std::vector<Thread_data, ut::allocator<Thread_data>>;
244
245 dberr_t get_error() const;
246 std::string get_error_string() const;
247
248 /** Get the client error code (e.g. ER_LOAD_BULK_DATA_UNSORTED).
249 @return the client error code. */
250 int get_error_code() const;
251
252 /** @return table name where the data is being loaded. */
253 const char *get_table_name() const { return m_table->name.m_name; }
254
255 /** @return index name where the data is being loaded. */
256 const char *get_index_name() const { return m_index->name(); }
257
258 private:
259 /** Ensure that dict_sys->row_id is greater than max rowid used in bulk
260 load of this table.
261 @param[in] max_rowid max rowid used in this table. */
262 void set_sys_max_rowid(uint64_t max_rowid);
263
264 /** Merge the sub-trees to build the cluster index.
265 @return innodb error code. */
267
268 /** Calculate the flush queue size to be used based on the available memory.
269 @param[in] memory total buffer pool memory to use
270 @param[out] flush_queue_size calculated queue size
271 @param[out] allocate_in_pages true if need to allocate in pages
272 false if need to allocate in extents */
273 void get_queue_size(size_t memory, size_t &flush_queue_size,
274 bool &allocate_in_pages) const;
275
276 private:
277 /** Number of threads for bulk loading. */
278 const size_t m_num_threads{};
279
280 const size_t m_keynr{};
281
282 /** All thread specific data. */
284
285 /** Sub-tree loading contexts. */
287
288 /** Innodb dictionary table object. */
290
291 /** Index being loaded. Could be primary or secondary index. */
293
294 /** Allocator to extend tablespace and allocate extents. */
296
297 const trx_t *const m_trx{};
298
299 /** Flush queue size used by the Bulk_flusher */
301};
302
303inline std::string Loader::get_error_string() const {
304 std::string error;
305 for (auto &thr : m_ctxs) {
306 if (thr.get_error() != DB_SUCCESS) {
307 error = thr.get_error_string();
308 break;
309 }
310 }
311 return error;
312}
313
314inline int Loader::get_error_code() const {
315 int errcode = 0;
316 for (auto &thr : m_ctxs) {
317 errcode = thr.get_error_code();
318 if (errcode != 0) {
319 break;
320 }
321 }
322 return errcode;
323}
324
327 for (auto &thr : m_ctxs) {
328 e = thr.get_error();
329 if (e != DB_SUCCESS) {
330 break;
331 }
332 }
333 return e;
334}
335
336} // namespace ddl_bulk
Multi Threaded Index Build (MTIB) using BUF_BLOCK_MEMORY and dedicated Bulk_flusher threads.
void * Blob_context
Definition: bulk_data_service.h:49
Definition: btr0mtib.h:895
dberr_t open_blob(Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: btr0mtib.h:908
dberr_t close_blob(Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: btr0mtib.h:928
dberr_t write_blob(Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: btr0mtib.h:918
Definition: btr0mtib.h:479
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:300
Definition: ddl0bulk.h:45
std::list< Btree_multi::Btree_load * > m_list_subtrees
Each subtree needs to have a disjoint set of keys.
Definition: ddl0bulk.h:121
std::ostringstream m_sout
Definition: ddl0bulk.h:175
Btree_multi::Btree_load * get_subtree()
Get the last subtree created by this thread.
Definition: ddl0bulk.h:113
void free()
Free thread specific data.
Definition: ddl0bulk.cc:304
dberr_t m_err
Error code at thread level.
Definition: ddl0bulk.h:171
dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows, size_t row_index)
Fill the tuple to set the column data.
Definition: ddl0bulk.cc:442
dberr_t write_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.h:79
dberr_t open_blob(Btree_multi::Btree_load *sub_tree, Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: ddl0bulk.h:67
std::string get_error_string() const
Definition: ddl0bulk.h:100
dtuple_t * m_entry
Tuple for inserting row to cluster index.
Definition: ddl0bulk.h:159
dtuple_t * m_row
Tuple for converting input data to table row.
Definition: ddl0bulk.h:156
unsigned char m_rowid_data[DATA_ROW_ID_LEN]
Column data for system column DATA_ROW_ID.
Definition: ddl0bulk.h:168
int get_error_code() const
Get the client error code (eg.
Definition: ddl0bulk.h:104
mem_heap_t * m_heap
Heap for allocating tuple memory.
Definition: ddl0bulk.h:153
size_t m_nth_index
Definition: ddl0bulk.h:177
dberr_t close_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.h:90
int m_errcode
Definition: ddl0bulk.h:173
unsigned char m_rollptr_data[DATA_ROLL_PTR_LEN]
Column data for system column Roll pointer.
Definition: ddl0bulk.h:165
void add_subtree(Btree_multi::Btree_load *subtree)
Add given subtree to the list of subtrees.
Definition: ddl0bulk.h:108
dberr_t load(const row_prebuilt_t *prebuilt, Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree for a specific thread.
Definition: ddl0bulk.cc:179
uint64_t m_last_rowid
The last DB_ROW_ID used by this thread.
Definition: ddl0bulk.h:124
void init(const row_prebuilt_t *prebuilt)
Initialize thread specific data.
Definition: ddl0bulk.cc:46
dberr_t get_error() const
Definition: ddl0bulk.h:99
size_t m_queue_size
Flush queue size used by the Bulk_flusher.
Definition: ddl0bulk.h:116
void fill_index_entry(const row_prebuilt_t *prebuilt)
Fill he cluster index entry from tuple data.
Definition: ddl0bulk.cc:400
void fill_system_columns(const row_prebuilt_t *prebuilt)
Fill system columns for index entry to be loaded.
Definition: ddl0bulk.cc:378
bool store_int_col(const Column_mysql &col, byte *data_ptr, size_t &data_len)
Store integer column in Innodb format.
Definition: ddl0bulk.cc:586
unsigned char m_trx_data[DATA_TRX_ID_LEN]
Column data for system column transaction ID.
Definition: ddl0bulk.h:162
Definition: ddl0bulk.h:40
std::vector< Btree_multi::Btree_load *, ut::allocator< Btree_multi::Btree_load * > > Btree_loads
Definition: ddl0bulk.h:242
Btree_loads m_sub_tree_loads
Sub-tree loading contexts.
Definition: ddl0bulk.h:286
void get_queue_size(size_t memory, size_t &flush_queue_size, bool &allocate_in_pages) const
Calculate the flush queue size to be used based on the available memory.
Definition: ddl0bulk.cc:80
Loader(size_t num_threads, size_t keynr, const trx_t *trx)
Loader context constructor.
Definition: ddl0bulk.h:184
dict_index_t * m_index
Index being loaded.
Definition: ddl0bulk.h:292
const char * get_index_name() const
Definition: ddl0bulk.h:256
const trx_t *const m_trx
Definition: ddl0bulk.h:297
dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size, size_t memory)
Prepare bulk loading by multiple threads.
Definition: ddl0bulk.cc:104
Btree_multi::Bulk_extent_allocator m_extent_allocator
Allocator to extend tablespace and allocate extents.
Definition: ddl0bulk.h:295
dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree by a thread.
Definition: ddl0bulk.cc:142
const char * get_table_name() const
Definition: ddl0bulk.h:253
int get_error_code() const
Get the client error code (e.g.
Definition: ddl0bulk.h:314
size_t m_queue_size
Flush queue size used by the Bulk_flusher.
Definition: ddl0bulk.h:300
size_t get_keynr() const
Definition: ddl0bulk.h:205
dict_table_t * m_table
Innodb dictionary table object.
Definition: ddl0bulk.h:289
dberr_t merge_subtrees()
Merge the sub-trees to build the cluster index.
Definition: ddl0bulk.cc:650
dberr_t end(bool is_error)
Finish bulk load operation, combining the sub-trees produced by concurrent threads.
Definition: ddl0bulk.cc:311
void * Blob_context
Definition: ddl0bulk.h:42
dberr_t get_error() const
Definition: ddl0bulk.h:325
std::string get_error_string() const
Definition: ddl0bulk.h:303
dberr_t close_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.cc:170
void set_sys_max_rowid(uint64_t max_rowid)
Ensure that dict_sys->row_id is greater than max rowid used in bulk load of this table.
Definition: ddl0bulk.cc:367
dberr_t write_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.cc:161
std::vector< Thread_data, ut::allocator< Thread_data > > Thread_ctxs
Definition: ddl0bulk.h:243
Thread_ctxs m_ctxs
All thread specific data.
Definition: ddl0bulk.h:283
dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx, lob::ref_t &ref)
Open a blob.
Definition: ddl0bulk.cc:152
const size_t m_num_threads
Number of threads for bulk loading.
Definition: ddl0bulk.h:278
const size_t m_keynr
Definition: ddl0bulk.h:280
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2183
constexpr size_t DATA_ROLL_PTR_LEN
Rollback data pointer type size in bytes.
Definition: data0type.h:191
constexpr size_t DATA_TRX_ID_LEN
Transaction ID type size in bytes.
Definition: data0type.h:185
constexpr uint32_t DATA_ROW_ID_LEN
stored length for row id
Definition: data0type.h:179
dberr_t
Definition: db0err.h:39
@ DB_SUCCESS
Definition: db0err.h:43
void error(const char *format,...)
PT & ref(PT *tp)
Definition: tablespace_impl.cc:359
Definition: ddl0bulk.cc:44
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2876
Interface between Innobase row operations and MySQL.
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:857
Definition: bulk_data_service.h:204
Data structure for an index.
Definition: dict0mem.h:1041
id_name_t name
index name
Definition: dict0mem.h:1049
Data structure for a database table.
Definition: dict0mem.h:1913
table_name_t name
Table name.
Definition: dict0mem.h:1988
Structure for an SQL data tuple of fields (logical record)
Definition: data0data.h:696
The struct 'lob::ref_t' represents an external field reference.
Definition: lob0lob.h:198
The info structure stored at the beginning of a heap block.
Definition: mem0mem.h:302
A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; th...
Definition: row0mysql.h:515
char * m_name
The name in internal representation.
Definition: dict0mem.h:467
Definition: trx0trx.h:675