MySQL 9.3.0
Source Code Documentation
ddl0bulk.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2022, 2025, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ddl0bulk.h
29BULK Data Load. Currently treated like DDL */
30
31#pragma once
32
33#include "btr0mtib.h"
34#include "row0mysql.h"
35#include "sql/handler.h"
36
37namespace ddl_bulk {
38
39class Loader {
40 public:
41 using Blob_context = void *;
42 using byte = unsigned char;
43
45 public:
46 /** Initialize thread specific data.
47 @param[in] prebuilt prebuilt structures from innodb table handler */
48 void init(const row_prebuilt_t *prebuilt);
49
50 /** Load rows to a sub-tree for a specific thread.
51 @param[in] prebuilt prebuilt structures from innodb table handler
52 @param[in,out] sub_tree sub tree to load data to
53 @param[in] rows rows to be loaded to the cluster index sub-tree
54 @param[in] wait_cbk Stat callbacks
55 @return innodb error code */
56 dberr_t load(const row_prebuilt_t *prebuilt,
57 Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows,
59
60 public:
61 /** Create a blob.
62 @param[in] sub_tree sub tree to load data to
63 @param[out] blob_ctx pointer to an opaque object representing a blob.
64 @param[out] ref blob reference to be placed in the record.
65 @return DB_SUCCESS on success or a failure error code. */
67 lob::ref_t &ref) {
68 return sub_tree->open_blob(blob_ctx, ref);
69 }
70
71 /** Write data into the blob.
72 @param[in] sub_tree sub tree to load data to
73 @param[in] blob_ctx pointer to blob into which data is written.
74 @param[out] ref blob reference to be placed in the record.
75 @param[in] data buffer containing data to be written
76 @param[in] len length of the data to be written.
77 @return DB_SUCCESS on success or a failure error code. */
79 lob::ref_t &ref, const byte *data, size_t len) {
80 return sub_tree->write_blob(blob_ctx, ref, data, len);
81 }
82
83 /** Indicate that the blob has been completed, so that resources can be
84 removed, and as necessary flushing can be done.
85 @param[in] sub_tree sub tree to load data to
86 @param[in] blob_ctx pointer to blob which has been completely written.
87 @param[out] ref blob reference to be placed in the record.
88 @return DB_SUCCESS on success or a failure error code. */
90 lob::ref_t &ref) {
91 return sub_tree->close_blob(blob_ctx, ref);
92 }
93
94 public:
95 /** Free thread specific data. */
96 void free();
97
98 dberr_t get_error() const { return m_err; }
99 std::string get_error_string() const { return m_sout.str(); }
100
101 /** Get the client error code (eg. ER_LOAD_BULK_DATA_UNSORTED).
102 @return the client error code. */
103 int get_error_code() const { return m_errcode; }
104
105 private:
106 /** Fill system columns for index entry to be loaded.
107 @param[in] prebuilt prebuilt structures from innodb table handler */
108 void fill_system_columns(const row_prebuilt_t *prebuilt);
109
110 /** Fill the tuple to set the column data
111 @param[in] prebuilt prebuilt structures from innodb table handler
112 @param[in] rows sql rows with column data
113 @param[in] row_index current row index
114 @return innodb error code. */
115 dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows,
116 size_t row_index);
117
118 /** Fill he cluster index entry from tuple data.
119 @param[in] prebuilt prebuilt structures from innodb table handler */
120 void fill_index_entry(const row_prebuilt_t *prebuilt);
121
122 /** Store integer column in Innodb format.
123 @param[in] col sql column data
124 @param[in,out] data_ptr data buffer for storing converted data
125 @param[in,out] data_len data buffer length
126 @return true if successful. */
127 bool store_int_col(const Column_mysql &col, byte *data_ptr,
128 size_t &data_len);
129
130 private:
131 /** Heap for allocating tuple memory. */
133
134 /** Tuple for converting input data to table row. */
136
137 /** Tuple for inserting row to cluster index. */
139
140 /** Column data for system column transaction ID. */
142
143 /** Column data for system column Roll pointer. */
145
146 /** Error code at thread level. */
148
149 int m_errcode{0};
150
152
154 };
155
156 /** Loader context constructor.
157 @param[in] num_threads Number of threads to use for bulk loading
158 @param[in] keynr index number
159 @param[in] trx transaction context. */
160 Loader(size_t num_threads, size_t keynr, const trx_t *trx)
161 : m_num_threads(num_threads), m_keynr(keynr), m_trx(trx) {}
162
163 /** Prepare bulk loading by multiple threads.
164 @param[in] prebuilt prebuilt structures from innodb table handler
165 @param[in] data_size total data size to load in bytes
166 @param[in] memory memory to be used from buffer pool
167 @return innodb error code */
168 dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size,
169 size_t memory);
170
171 /** Load rows to a sub-tree by a thread. Called concurrently by multiple
172 execution threads.
173 @param[in] prebuilt prebuilt structures from innodb table handler
174 @param[in] thread_index identifies the thread and the B-tree to use.
175 @param[in] rows rows to be loaded to the cluster index sub-tree
176 @param[in] wait_cbk Stat callbacks
177 @return innodb error code */
178 dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index,
179 const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk);
180
181 size_t get_keynr() const { return m_keynr; }
182
183 public:
184 /** Open a blob.
185 @param[in] thread_index identifies the thread and the B-tree to use.
186 @param[out] blob_ctx pointer to an opaque object representing a blob.
187 @param[out] ref blob reference to be placed in the record.
188 @return DB_SUCCESS on success or a failure error code. */
189 dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx,
190 lob::ref_t &ref);
191
192 /** Write data into the blob.
193 @param[in] thread_index identifies the thread and the B-tree to use.
194 @param[in] blob_ctx pointer to blob into which data is written.
195 @param[out] ref blob reference to be placed in the record.
196 @param[in] data buffer containing data to be written
197 @param[in] len length of the data to be written.
198 @return DB_SUCCESS on success or a failure error code. */
199 dberr_t write_blob(size_t thread_index, Blob_context blob_ctx,
200 lob::ref_t &ref, const byte *data, size_t len);
201
202 /** Indicate that the blob has been completed, so that resources can be
203 removed, and as necessary flushing can be done.
204 @param[in] thread_index identifies the thread and the B-tree to use.
205 @param[in] blob_ctx pointer to blob which has been completely written.
206 @param[out] ref blob reference to be placed in the record.
207 @return DB_SUCCESS on success or a failure error code. */
208 dberr_t close_blob(size_t thread_index, Blob_context blob_ctx,
209 lob::ref_t &ref);
210
211 /** Finish bulk load operation, combining the sub-trees produced by concurrent
212 threads.
213 @param[in] is_error true if called for cleanup and rollback after an error
214 @return innodb error code */
215 dberr_t end(bool is_error);
216
219 using Thread_ctxs = std::vector<Thread_data, ut::allocator<Thread_data>>;
220
221 dberr_t get_error() const;
222 std::string get_error_string() const;
223
224 /** Get the client error code (e.g. ER_LOAD_BULK_DATA_UNSORTED).
225 @return the client error code. */
226 int get_error_code() const;
227
228 /** @return table name where the data is being loaded. */
229 const char *get_table_name() const { return m_table->name.m_name; }
230
231 /** @return index name where the data is being loaded. */
232 const char *get_index_name() const { return m_index->name(); }
233
234 private:
235 /** Merge the sub-trees to build the cluster index.
236 @return innodb error code. */
238
239 /** Calculate the flush queue size to be used based on the available memory.
240 @param[in] memory total buffer pool memory to use
241 @param[out] flush_queue_size calculated queue size
242 @param[out] allocate_in_pages true if need to allocate in pages
243 false if need to allocate in extents */
244 void get_queue_size(size_t memory, size_t &flush_queue_size,
245 bool &allocate_in_pages) const;
246
247 private:
248 /** Number of threads for bulk loading. */
249 const size_t m_num_threads{};
250
251 const size_t m_keynr{};
252
253 /** All thread specific data. */
255
256 /** Sub-tree loading contexts. */
258
259 /** Innodb dictionary table object. */
261
262 /** Index being loaded. Could be primary or secondary index. */
264
265 /** Allocator to extend tablespace and allocate extents. */
267
268 const trx_t *const m_trx{};
269};
270
271inline std::string Loader::get_error_string() const {
272 std::string error;
273 for (auto &thr : m_ctxs) {
274 if (thr.get_error() != DB_SUCCESS) {
275 error = thr.get_error_string();
276 break;
277 }
278 }
279 return error;
280}
281
282inline int Loader::get_error_code() const {
283 int errcode = 0;
284 for (auto &thr : m_ctxs) {
285 errcode = thr.get_error_code();
286 if (errcode != 0) {
287 break;
288 }
289 }
290 return errcode;
291}
292
295 for (auto &thr : m_ctxs) {
296 e = thr.get_error();
297 if (e != DB_SUCCESS) {
298 break;
299 }
300 }
301 return e;
302}
303
304} // namespace ddl_bulk
Multi Threaded Index Build (MTIB) using BUF_BLOCK_MEMORY and dedicated Bulk_flusher threads.
void * Blob_context
Definition: bulk_data_service.h:49
Definition: btr0mtib.h:895
dberr_t open_blob(Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: btr0mtib.h:908
dberr_t close_blob(Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: btr0mtib.h:928
dberr_t write_blob(Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: btr0mtib.h:918
Definition: btr0mtib.h:479
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:292
Definition: ddl0bulk.h:44
std::ostringstream m_sout
Definition: ddl0bulk.h:151
void free()
Free thread specific data.
Definition: ddl0bulk.cc:304
dberr_t m_err
Error code at thread level.
Definition: ddl0bulk.h:147
dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows, size_t row_index)
Fill the tuple to set the column data.
Definition: ddl0bulk.cc:411
dberr_t write_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.h:78
dberr_t open_blob(Btree_multi::Btree_load *sub_tree, Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: ddl0bulk.h:66
std::string get_error_string() const
Definition: ddl0bulk.h:99
dtuple_t * m_entry
Tuple for inserting row to cluster index.
Definition: ddl0bulk.h:138
dtuple_t * m_row
Tuple for converting input data to table row.
Definition: ddl0bulk.h:135
int get_error_code() const
Get the client error code (eg.
Definition: ddl0bulk.h:103
mem_heap_t * m_heap
Heap for allocating tuple memory.
Definition: ddl0bulk.h:132
size_t m_nth_index
Definition: ddl0bulk.h:153
dberr_t close_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.h:89
int m_errcode
Definition: ddl0bulk.h:149
unsigned char m_rollptr_data[DATA_ROLL_PTR_LEN]
Column data for system column Roll pointer.
Definition: ddl0bulk.h:144
dberr_t load(const row_prebuilt_t *prebuilt, Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree for a specific thread.
Definition: ddl0bulk.cc:186
void init(const row_prebuilt_t *prebuilt)
Initialize thread specific data.
Definition: ddl0bulk.cc:46
dberr_t get_error() const
Definition: ddl0bulk.h:98
void fill_index_entry(const row_prebuilt_t *prebuilt)
Fill he cluster index entry from tuple data.
Definition: ddl0bulk.cc:369
void fill_system_columns(const row_prebuilt_t *prebuilt)
Fill system columns for index entry to be loaded.
Definition: ddl0bulk.cc:351
bool store_int_col(const Column_mysql &col, byte *data_ptr, size_t &data_len)
Store integer column in Innodb format.
Definition: ddl0bulk.cc:525
unsigned char m_trx_data[DATA_TRX_ID_LEN]
Column data for system column transaction ID.
Definition: ddl0bulk.h:141
Definition: ddl0bulk.h:39
std::vector< Btree_multi::Btree_load *, ut::allocator< Btree_multi::Btree_load * > > Btree_loads
Definition: ddl0bulk.h:218
Btree_loads m_sub_tree_loads
Sub-tree loading contexts.
Definition: ddl0bulk.h:257
void get_queue_size(size_t memory, size_t &flush_queue_size, bool &allocate_in_pages) const
Calculate the flush queue size to be used based on the available memory.
Definition: ddl0bulk.cc:80
Loader(size_t num_threads, size_t keynr, const trx_t *trx)
Loader context constructor.
Definition: ddl0bulk.h:160
dict_index_t * m_index
Index being loaded.
Definition: ddl0bulk.h:263
const char * get_index_name() const
Definition: ddl0bulk.h:232
const trx_t *const m_trx
Definition: ddl0bulk.h:268
dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size, size_t memory)
Prepare bulk loading by multiple threads.
Definition: ddl0bulk.cc:104
Btree_multi::Bulk_extent_allocator m_extent_allocator
Allocator to extend tablespace and allocate extents.
Definition: ddl0bulk.h:266
dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree by a thread.
Definition: ddl0bulk.cc:141
const char * get_table_name() const
Definition: ddl0bulk.h:229
int get_error_code() const
Get the client error code (e.g.
Definition: ddl0bulk.h:282
size_t get_keynr() const
Definition: ddl0bulk.h:181
dict_table_t * m_table
Innodb dictionary table object.
Definition: ddl0bulk.h:260
dberr_t merge_subtrees()
Merge the sub-trees to build the cluster index.
Definition: ddl0bulk.cc:589
dberr_t end(bool is_error)
Finish bulk load operation, combining the sub-trees produced by concurrent threads.
Definition: ddl0bulk.cc:311
void * Blob_context
Definition: ddl0bulk.h:41
dberr_t get_error() const
Definition: ddl0bulk.h:293
std::string get_error_string() const
Definition: ddl0bulk.h:271
dberr_t close_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.cc:175
dberr_t write_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.cc:164
std::vector< Thread_data, ut::allocator< Thread_data > > Thread_ctxs
Definition: ddl0bulk.h:219
Thread_ctxs m_ctxs
All thread specific data.
Definition: ddl0bulk.h:254
dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx, lob::ref_t &ref)
Open a blob.
Definition: ddl0bulk.cc:153
const size_t m_num_threads
Number of threads for bulk loading.
Definition: ddl0bulk.h:249
const size_t m_keynr
Definition: ddl0bulk.h:251
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2183
constexpr size_t DATA_ROLL_PTR_LEN
Rollback data pointer type size in bytes.
Definition: data0type.h:191
constexpr size_t DATA_TRX_ID_LEN
Transaction ID type size in bytes.
Definition: data0type.h:185
dberr_t
Definition: db0err.h:39
@ DB_SUCCESS
Definition: db0err.h:43
void error(const char *format,...)
PT & ref(PT *tp)
Definition: tablespace_impl.cc:359
Definition: ddl0bulk.cc:44
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2876
Interface between Innobase row operations and MySQL.
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:840
Definition: bulk_data_service.h:196
Data structure for an index.
Definition: dict0mem.h:1041
id_name_t name
index name
Definition: dict0mem.h:1049
Data structure for a database table.
Definition: dict0mem.h:1913
table_name_t name
Table name.
Definition: dict0mem.h:1988
Structure for an SQL data tuple of fields (logical record)
Definition: data0data.h:696
The struct 'lob::ref_t' represents an external field reference.
Definition: lob0lob.h:198
The info structure stored at the beginning of a heap block.
Definition: mem0mem.h:302
A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; th...
Definition: row0mysql.h:515
char * m_name
The name in internal representation.
Definition: dict0mem.h:467
Definition: trx0trx.h:675