MySQL 9.1.0
Source Code Documentation
ddl0bulk.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2022, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ddl0bulk.h
29BULK Data Load. Currently treated like DDL */
30
31#pragma once
32
33#include "btr0mtib.h"
34#include "row0mysql.h"
35#include "sql/handler.h"
36
37namespace ddl_bulk {
38
39class Loader {
40 public:
41 using Blob_context = void *;
42 using byte = unsigned char;
43
45 public:
46 /** Initialize thread specific data.
47 @param[in] prebuilt prebuilt structures from innodb table handler */
48 void init(const row_prebuilt_t *prebuilt);
49
50 /** Load rows to a sub-tree for a specific thread.
51 @param[in] prebuilt prebuilt structures from innodb table handler
52 @param[in,out] sub_tree sub tree to load data to
53 @param[in] rows rows to be loaded to the cluster index sub-tree
54 @param[in] wait_cbk Stat callbacks
55 @return innodb error code */
56 dberr_t load(const row_prebuilt_t *prebuilt,
57 Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows,
59
60 public:
61 /** Create a blob.
62 @param[in] sub_tree sub tree to load data to
63 @param[out] blob_ctx pointer to an opaque object representing a blob.
64 @param[out] ref blob reference to be placed in the record.
65 @return DB_SUCCESS on success or a failure error code. */
67 lob::ref_t &ref) {
68 return sub_tree->open_blob(blob_ctx, ref);
69 }
70
71 /** Write data into the blob.
72 @param[in] sub_tree sub tree to load data to
73 @param[in] blob_ctx pointer to blob into which data is written.
74 @param[out] ref blob reference to be placed in the record.
75 @param[in] data buffer containing data to be written
76 @param[in] len length of the data to be written.
77 @return DB_SUCCESS on success or a failure error code. */
79 lob::ref_t &ref, const byte *data, size_t len) {
80 return sub_tree->write_blob(blob_ctx, ref, data, len);
81 }
82
83 /** Indicate that the blob has been completed, so that resources can be
84 removed, and as necessary flushing can be done.
85 @param[in] sub_tree sub tree to load data to
86 @param[in] blob_ctx pointer to blob which has been completely written.
87 @param[out] ref blob reference to be placed in the record.
88 @return DB_SUCCESS on success or a failure error code. */
90 lob::ref_t &ref) {
91 return sub_tree->close_blob(blob_ctx, ref);
92 }
93
94 public:
95 /** Free thread specific data. */
96 void free();
97
98 dberr_t get_error() const { return m_err; }
99 std::string get_error_string() const { return m_sout.str(); }
100
101 /** Get the client error code (eg. ER_LOAD_BULK_DATA_UNSORTED).
102 @return the client error code. */
103 int get_error_code() const { return m_errcode; }
104
105 private:
106 /** Fill system columns for index entry to be loaded.
107 @param[in] prebuilt prebuilt structures from innodb table handler */
108 void fill_system_columns(const row_prebuilt_t *prebuilt);
109
110 /** Fill the tuple to set the column data
111 @param[in] prebuilt prebuilt structures from innodb table handler
112 @param[in] rows sql rows with column data
113 @param[in] row_index current row index
114 @return innodb error code. */
115 dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows,
116 size_t row_index);
117
118 /** Fill he cluster index entry from tuple data.
119 @param[in] prebuilt prebuilt structures from innodb table handler */
120 void fill_index_entry(const row_prebuilt_t *prebuilt);
121
122 /** Store integer column in Innodb format.
123 @param[in] col sql column data
124 @param[in,out] data_ptr data buffer for storing converted data
125 @param[in,out] data_len data buffer length
126 @return true if successful. */
127 bool store_int_col(const Column_mysql &col, byte *data_ptr,
128 size_t &data_len);
129
130 private:
131 /** Heap for allocating tuple memory. */
133
134 /** Tuple for converting input data to table row. */
136
137 /** Tuple for inserting row to cluster index. */
139
140 /** Column data for system column transaction ID. */
142
143 /** Column data for system column Roll pointer. */
145
146 /** Error code at thread level. */
148
149 int m_errcode{0};
150
152 };
153
154 /** Loader context constructor.
155 @param[in] num_threads Number of threads to use for bulk loading */
156 Loader(size_t num_threads) : m_num_threads(num_threads) {}
157
158 /** Prepare bulk loading by multiple threads.
159 @param[in] prebuilt prebuilt structures from innodb table handler
160 @param[in] data_size total data size to load in bytes
161 @param[in] memory memory to be used from buffer pool
162 @return innodb error code */
163 dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size,
164 size_t memory);
165
166 /** Load rows to a sub-tree by a thread. Called concurrently by multiple
167 execution threads.
168 @param[in] prebuilt prebuilt structures from innodb table handler
169 @param[in] thread_index identifies the thread and the B-tree to use.
170 @param[in] rows rows to be loaded to the cluster index sub-tree
171 @param[in] wait_cbk Stat callbacks
172 @return innodb error code */
173 dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index,
174 const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk);
175
176 public:
177 /** Open a blob.
178 @param[in] thread_index identifies the thread and the B-tree to use.
179 @param[out] blob_ctx pointer to an opaque object representing a blob.
180 @param[out] ref blob reference to be placed in the record.
181 @return DB_SUCCESS on success or a failure error code. */
182 dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx,
183 lob::ref_t &ref);
184
185 /** Write data into the blob.
186 @param[in] thread_index identifies the thread and the B-tree to use.
187 @param[in] blob_ctx pointer to blob into which data is written.
188 @param[out] ref blob reference to be placed in the record.
189 @param[in] data buffer containing data to be written
190 @param[in] len length of the data to be written.
191 @return DB_SUCCESS on success or a failure error code. */
192 dberr_t write_blob(size_t thread_index, Blob_context blob_ctx,
193 lob::ref_t &ref, const byte *data, size_t len);
194
195 /** Indicate that the blob has been completed, so that resources can be
196 removed, and as necessary flushing can be done.
197 @param[in] thread_index identifies the thread and the B-tree to use.
198 @param[in] blob_ctx pointer to blob which has been completely written.
199 @param[out] ref blob reference to be placed in the record.
200 @return DB_SUCCESS on success or a failure error code. */
201 dberr_t close_blob(size_t thread_index, Blob_context blob_ctx,
202 lob::ref_t &ref);
203
204 /** Finish bulk load operation, combining the sub-trees produced by concurrent
205 threads.
206 @param[in] prebuilt prebuilt structures from innodb table handler
207 @param[in] is_error true if called for cleanup and rollback after an error
208 @return innodb error code */
209 dberr_t end(const row_prebuilt_t *prebuilt, bool is_error);
210
213 using Thread_ctxs = std::vector<Thread_data, ut::allocator<Thread_data>>;
214
215 dberr_t get_error() const;
216 std::string get_error_string() const;
217
218 /** Get the client error code (e.g. ER_LOAD_BULK_DATA_UNSORTED).
219 @return the client error code. */
220 int get_error_code() const;
221
222 /** @return table name where the data is being loaded. */
223 const char *get_table_name() const { return m_table->name.m_name; }
224
225 /** @return index name where the data is being loaded. */
226 const char *get_index_name() const {
227 auto index = m_table->first_index();
228 return index->name();
229 }
230
231 private:
232 /** Merge the sub-trees to build the cluster index.
233 @param[in] prebuilt prebuilt structures from innodb table handler
234 @return innodb error code. */
235 dberr_t merge_subtrees(const row_prebuilt_t *prebuilt);
236
237 /** Calculate the flush queue size to be used based on the available memory.
238 @param[in] memory total buffer pool memory to use
239 @param[out] flush_queue_size calculated queue size
240 @param[out] allocate_in_pages true if need to allocate in pages
241 false if need to allocate in extents */
242 void get_queue_size(size_t memory, size_t &flush_queue_size,
243 bool &allocate_in_pages) const;
244
245 private:
246 /** Number of threads for bulk loading. */
248
249 /** All thread specific data. */
251
252 /** Sub-tree loading contexts. */
254
255 /** Innodb dictionary table object. */
257
258 /** Allocator to extend tablespace and allocate extents. */
260};
261
262inline std::string Loader::get_error_string() const {
263 std::string error;
264 for (auto &thr : m_ctxs) {
265 if (thr.get_error() != DB_SUCCESS) {
266 error = thr.get_error_string();
267 break;
268 }
269 }
270 return error;
271}
272
273inline int Loader::get_error_code() const {
274 int errcode = 0;
275 for (auto &thr : m_ctxs) {
276 errcode = thr.get_error_code();
277 if (errcode != 0) {
278 break;
279 }
280 }
281 return errcode;
282}
283
286 for (auto &thr : m_ctxs) {
287 e = thr.get_error();
288 if (e != DB_SUCCESS) {
289 break;
290 }
291 }
292 return e;
293}
294
295} // namespace ddl_bulk
Multi Threaded Index Build (MTIB) using BUF_BLOCK_MEMORY and dedicated Bulk_flusher threads.
void * Blob_context
Definition: bulk_data_service.h:49
Definition: btr0mtib.h:893
dberr_t open_blob(Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: btr0mtib.h:906
dberr_t close_blob(Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: btr0mtib.h:926
dberr_t write_blob(Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: btr0mtib.h:916
Definition: btr0mtib.h:479
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:260
Definition: ddl0bulk.h:44
std::ostringstream m_sout
Definition: ddl0bulk.h:151
void free()
Free thread specific data.
Definition: ddl0bulk.cc:294
dberr_t m_err
Error code at thread level.
Definition: ddl0bulk.h:147
dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows, size_t row_index)
Fill the tuple to set the column data.
Definition: ddl0bulk.cc:436
dberr_t write_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.h:78
dberr_t open_blob(Btree_multi::Btree_load *sub_tree, Blob_context &blob_ctx, lob::ref_t &ref)
Create a blob.
Definition: ddl0bulk.h:66
std::string get_error_string() const
Definition: ddl0bulk.h:99
dtuple_t * m_entry
Tuple for inserting row to cluster index.
Definition: ddl0bulk.h:138
dtuple_t * m_row
Tuple for converting input data to table row.
Definition: ddl0bulk.h:135
int get_error_code() const
Get the client error code (eg.
Definition: ddl0bulk.h:103
mem_heap_t * m_heap
Heap for allocating tuple memory.
Definition: ddl0bulk.h:132
dberr_t close_blob(Btree_multi::Btree_load *sub_tree, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.h:89
int m_errcode
Definition: ddl0bulk.h:149
unsigned char m_rollptr_data[DATA_ROLL_PTR_LEN]
Column data for system column Roll pointer.
Definition: ddl0bulk.h:144
dberr_t load(const row_prebuilt_t *prebuilt, Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree for a specific thread.
Definition: ddl0bulk.cc:177
void init(const row_prebuilt_t *prebuilt)
Initialize thread specific data.
Definition: ddl0bulk.cc:46
dberr_t get_error() const
Definition: ddl0bulk.h:98
void fill_index_entry(const row_prebuilt_t *prebuilt)
Fill he cluster index entry from tuple data.
Definition: ddl0bulk.cc:411
void fill_system_columns(const row_prebuilt_t *prebuilt)
Fill system columns for index entry to be loaded.
Definition: ddl0bulk.cc:393
bool store_int_col(const Column_mysql &col, byte *data_ptr, size_t &data_len)
Store integer column in Innodb format.
Definition: ddl0bulk.cc:561
unsigned char m_trx_data[DATA_TRX_ID_LEN]
Column data for system column transaction ID.
Definition: ddl0bulk.h:141
Definition: ddl0bulk.h:39
std::vector< Btree_multi::Btree_load *, ut::allocator< Btree_multi::Btree_load * > > Btree_loads
Definition: ddl0bulk.h:212
Btree_loads m_sub_tree_loads
Sub-tree loading contexts.
Definition: ddl0bulk.h:253
void get_queue_size(size_t memory, size_t &flush_queue_size, bool &allocate_in_pages) const
Calculate the flush queue size to be used based on the available memory.
Definition: ddl0bulk.cc:71
dberr_t merge_subtrees(const row_prebuilt_t *prebuilt)
Merge the sub-trees to build the cluster index.
Definition: ddl0bulk.cc:625
dberr_t end(const row_prebuilt_t *prebuilt, bool is_error)
Finish bulk load operation, combining the sub-trees produced by concurrent threads.
Definition: ddl0bulk.cc:301
Loader(size_t num_threads)
Loader context constructor.
Definition: ddl0bulk.h:156
const char * get_index_name() const
Definition: ddl0bulk.h:226
dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size, size_t memory)
Prepare bulk loading by multiple threads.
Definition: ddl0bulk.cc:95
Btree_multi::Bulk_extent_allocator m_extent_allocator
Allocator to extend tablespace and allocate extents.
Definition: ddl0bulk.h:259
size_t m_num_threads
Number of threads for bulk loading.
Definition: ddl0bulk.h:247
dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree by a thread.
Definition: ddl0bulk.cc:132
const char * get_table_name() const
Definition: ddl0bulk.h:223
int get_error_code() const
Get the client error code (e.g.
Definition: ddl0bulk.h:273
dict_table_t * m_table
Innodb dictionary table object.
Definition: ddl0bulk.h:256
void * Blob_context
Definition: ddl0bulk.h:41
dberr_t get_error() const
Definition: ddl0bulk.h:284
std::string get_error_string() const
Definition: ddl0bulk.h:262
dberr_t close_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref)
Indicate that the blob has been completed, so that resources can be removed, and as necessary flushin...
Definition: ddl0bulk.cc:166
dberr_t write_blob(size_t thread_index, Blob_context blob_ctx, lob::ref_t &ref, const byte *data, size_t len)
Write data into the blob.
Definition: ddl0bulk.cc:155
std::vector< Thread_data, ut::allocator< Thread_data > > Thread_ctxs
Definition: ddl0bulk.h:213
Thread_ctxs m_ctxs
All thread specific data.
Definition: ddl0bulk.h:250
dberr_t open_blob(size_t thread_index, Blob_context &blob_ctx, lob::ref_t &ref)
Open a blob.
Definition: ddl0bulk.cc:144
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2183
constexpr size_t DATA_ROLL_PTR_LEN
Rollback data pointer type size in bytes.
Definition: data0type.h:191
constexpr size_t DATA_TRX_ID_LEN
Transaction ID type size in bytes.
Definition: data0type.h:185
dberr_t
Definition: db0err.h:39
@ DB_SUCCESS
Definition: db0err.h:43
void error(const char *format,...)
PT & ref(PT *tp)
Definition: tablespace_impl.cc:359
Definition: ddl0bulk.cc:44
Definition: aligned_atomic.h:44
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2876
Interface between Innobase row operations and MySQL.
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:698
Definition: bulk_data_service.h:193
id_name_t name
index name
Definition: dict0mem.h:1049
Data structure for a database table.
Definition: dict0mem.h:1904
table_name_t name
Table name.
Definition: dict0mem.h:1979
const dict_index_t * first_index() const
Definition: dict0mem.h:2462
Structure for an SQL data tuple of fields (logical record)
Definition: data0data.h:696
The struct 'lob::ref_t' represents an external field reference.
Definition: lob0lob.h:198
The info structure stored at the beginning of a heap block.
Definition: mem0mem.h:302
A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; th...
Definition: row0mysql.h:515
char * m_name
The name in internal representation.
Definition: dict0mem.h:467