MySQL 8.4.2
Source Code Documentation
ddl0bulk.h
Go to the documentation of this file.
1/*****************************************************************************
2
3Copyright (c) 2022, 2024, Oracle and/or its affiliates.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is designed to work with certain software (including
10but not limited to OpenSSL) that is licensed under separate terms,
11as designated in a particular file or component or in included license
12documentation. The authors of MySQL hereby grant you an additional
13permission to link the program and your derivative works with the
14separately licensed software that they have either included with
15the program or referenced in the documentation.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25
26*****************************************************************************/
27
28/** @file include/ddl0bulk.h
29BULK Data Load. Currently treated like DDL */
30
31#pragma once
32
33#include "btr0mtib.h"
34#include "row0mysql.h"
35#include "sql/handler.h"
36
37namespace ddl_bulk {
38
39class Loader {
40 public:
42 public:
43 /** Initialize thread specific data.
44 @param[in] prebuilt prebuilt structures from innodb table handler */
45 void init(const row_prebuilt_t *prebuilt);
46
47 /** Load rows to a sub-tree for a specific thread.
48 @param[in] prebuilt prebuilt structures from innodb table handler
49 @param[in,out] sub_tree sub tree to load data to
50 @param[in] rows rows to be loaded to the cluster index sub-tree
51 @param[in] wait_cbk Stat callbacks
52 @return innodb error code */
53 dberr_t load(const row_prebuilt_t *prebuilt,
54 Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows,
56
57 /** Free thread specific data. */
58 void free();
59
60 dberr_t get_error() const { return m_err; }
61 std::string get_error_string() const { return m_sout.str(); }
62
63 /** Get the client error code (eg. ER_LOAD_BULK_DATA_UNSORTED).
64 @return the client error code. */
65 int get_error_code() const { return m_errcode; }
66
67 private:
68 /** Fill system columns for index entry to be loaded.
69 @param[in] prebuilt prebuilt structures from innodb table handler */
70 void fill_system_columns(const row_prebuilt_t *prebuilt);
71
72 /** Fill the tuple to set the column data
73 @param[in] prebuilt prebuilt structures from innodb table handler
74 @param[in] rows sql rows with column data
75 @param[in] row_index current row index
76 @return innodb error code. */
77 dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows,
78 size_t row_index);
79
80 /** Fill he cluster index entry from tuple data.
81 @param[in] prebuilt prebuilt structures from innodb table handler */
82 void fill_index_entry(const row_prebuilt_t *prebuilt);
83
84 /** Store integer column in Innodb format.
85 @param[in] col sql column data
86 @param[in,out] data_ptr data buffer for storing converted data
87 @param[in,out] data_len data buffer length
88 @return true if successful. */
89 bool store_int_col(const Column_mysql &col, byte *data_ptr,
90 size_t &data_len);
91
92 private:
93 /** Heap for allocating tuple memory. */
95
96 /** Tuple for converting input data to table row. */
98
99 /** Tuple for inserting row to cluster index. */
101
102 /** Column data for system column transaction ID. */
104
105 /** Column data for system column Roll pointer. */
107
108 /** Error code at thread level. */
110
111 int m_errcode{0};
112
114 };
115
116 /** Loader context constructor.
117 @param[in] num_threads Number of threads to use for bulk loading */
118 Loader(size_t num_threads) : m_num_threads(num_threads) {}
119
120 /** Prepare bulk loading by multiple threads.
121 @param[in] prebuilt prebuilt structures from innodb table handler
122 @param[in] data_size total data size to load in bytes
123 @param[in] memory memory to be used from buffer pool
124 @return innodb error code */
125 dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size,
126 size_t memory);
127
128 /** Load rows to a sub-tree by a thread. Called concurrently by multiple
129 execution threads.
130 @param[in] prebuilt prebuilt structures from innodb table handler
131 @param[in] thread_index true if called for cleanup and rollback after an
132 error
133 @param[in] rows rows to be loaded to the cluster index sub-tree
134 @param[in] wait_cbk Stat callbacks
135 @return innodb error code */
136 dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index,
137 const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk);
138
139 /** Finish bulk load operation, combining the sub-trees produced by concurrent
140 threads.
141 @param[in] prebuilt prebuilt structures from innodb table handler
142 @param[in] is_error true if called for cleanup and rollback after an error
143 @return innodb error code */
144 dberr_t end(const row_prebuilt_t *prebuilt, bool is_error);
145
148 using Thread_ctxs = std::vector<Thread_data, ut::allocator<Thread_data>>;
149
150 dberr_t get_error() const;
151 std::string get_error_string() const;
152
153 /** Get the client error code (e.g. ER_LOAD_BULK_DATA_UNSORTED).
154 @return the client error code. */
155 int get_error_code() const;
156
157 /** @return table name where the data is being loaded. */
158 const char *get_table_name() const { return m_table->name.m_name; }
159
160 /** @return index name where the data is being loaded. */
161 const char *get_index_name() const {
162 auto index = m_table->first_index();
163 return index->name();
164 }
165
166 private:
167 /** Merge the sub-trees to build the cluster index.
168 @param[in] prebuilt prebuilt structures from innodb table handler
169 @return innodb error code. */
170 dberr_t merge_subtrees(const row_prebuilt_t *prebuilt);
171
172 /** Calculate the flush queue size to be used based on the available memory.
173 @param[in] memory total buffer pool memory to use
174 @param[out] flush_queue_size calculated queue size
175 @param[out] allocate_in_pages true if need to allocate in pages
176 false if need to allocate in extents */
177 void get_queue_size(size_t memory, size_t &flush_queue_size,
178 bool &allocate_in_pages) const;
179
180 private:
181 /** Number of threads for bulk loading. */
183
184 /** All thread specific data. */
186
187 /** Sub-tree loading contexts. */
189
190 /** Innodb dictionary table object. */
192
193 /** Allocator to extend tablespace and allocate extents. */
195};
196
197inline std::string Loader::get_error_string() const {
198 std::string error;
199 for (auto &thr : m_ctxs) {
200 if (thr.get_error() != DB_SUCCESS) {
201 error = thr.get_error_string();
202 break;
203 }
204 }
205 return error;
206}
207
208inline int Loader::get_error_code() const {
209 int errcode = 0;
210 for (auto &thr : m_ctxs) {
211 errcode = thr.get_error_code();
212 if (errcode != 0) {
213 break;
214 }
215 }
216 return errcode;
217}
218
221 for (auto &thr : m_ctxs) {
222 e = thr.get_error();
223 if (e != DB_SUCCESS) {
224 break;
225 }
226 }
227 return e;
228}
229
230} // namespace ddl_bulk
Multi Threaded Index Build (MTIB) using BUF_BLOCK_MEMORY and dedicated Bulk_flusher threads.
Definition: btr0mtib.h:682
Definition: btr0mtib.h:386
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:86
Definition: ddl0bulk.h:41
std::ostringstream m_sout
Definition: ddl0bulk.h:113
void free()
Free thread specific data.
Definition: ddl0bulk.cc:259
dberr_t m_err
Error code at thread level.
Definition: ddl0bulk.h:109
dberr_t fill_tuple(const row_prebuilt_t *prebuilt, const Rows_mysql &rows, size_t row_index)
Fill the tuple to set the column data.
Definition: ddl0bulk.cc:374
std::string get_error_string() const
Definition: ddl0bulk.h:61
dtuple_t * m_entry
Tuple for inserting row to cluster index.
Definition: ddl0bulk.h:100
dtuple_t * m_row
Tuple for converting input data to table row.
Definition: ddl0bulk.h:97
int get_error_code() const
Get the client error code (eg.
Definition: ddl0bulk.h:65
mem_heap_t * m_heap
Heap for allocating tuple memory.
Definition: ddl0bulk.h:94
int m_errcode
Definition: ddl0bulk.h:111
unsigned char m_rollptr_data[DATA_ROLL_PTR_LEN]
Column data for system column Roll pointer.
Definition: ddl0bulk.h:106
dberr_t load(const row_prebuilt_t *prebuilt, Btree_multi::Btree_load *sub_tree, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree for a specific thread.
Definition: ddl0bulk.cc:142
void init(const row_prebuilt_t *prebuilt)
Initialize thread specific data.
Definition: ddl0bulk.cc:44
dberr_t get_error() const
Definition: ddl0bulk.h:60
void fill_index_entry(const row_prebuilt_t *prebuilt)
Fill he cluster index entry from tuple data.
Definition: ddl0bulk.cc:352
void fill_system_columns(const row_prebuilt_t *prebuilt)
Fill system columns for index entry to be loaded.
Definition: ddl0bulk.cc:334
bool store_int_col(const Column_mysql &col, byte *data_ptr, size_t &data_len)
Store integer column in Innodb format.
Definition: ddl0bulk.cc:435
unsigned char m_trx_data[DATA_TRX_ID_LEN]
Column data for system column transaction ID.
Definition: ddl0bulk.h:103
Definition: ddl0bulk.h:39
std::vector< Btree_multi::Btree_load *, ut::allocator< Btree_multi::Btree_load * > > Btree_loads
Definition: ddl0bulk.h:147
Btree_loads m_sub_tree_loads
Sub-tree loading contexts.
Definition: ddl0bulk.h:188
void get_queue_size(size_t memory, size_t &flush_queue_size, bool &allocate_in_pages) const
Calculate the flush queue size to be used based on the available memory.
Definition: ddl0bulk.cc:69
dberr_t merge_subtrees(const row_prebuilt_t *prebuilt)
Merge the sub-trees to build the cluster index.
Definition: ddl0bulk.cc:499
dberr_t end(const row_prebuilt_t *prebuilt, bool is_error)
Finish bulk load operation, combining the sub-trees produced by concurrent threads.
Definition: ddl0bulk.cc:266
Loader(size_t num_threads)
Loader context constructor.
Definition: ddl0bulk.h:118
const char * get_index_name() const
Definition: ddl0bulk.h:161
dberr_t begin(const row_prebuilt_t *prebuilt, size_t data_size, size_t memory)
Prepare bulk loading by multiple threads.
Definition: ddl0bulk.cc:93
Btree_multi::Bulk_extent_allocator m_extent_allocator
Allocator to extend tablespace and allocate extents.
Definition: ddl0bulk.h:194
size_t m_num_threads
Number of threads for bulk loading.
Definition: ddl0bulk.h:182
dberr_t load(const row_prebuilt_t *prebuilt, size_t thread_index, const Rows_mysql &rows, Bulk_load::Stat_callbacks &wait_cbk)
Load rows to a sub-tree by a thread.
Definition: ddl0bulk.cc:130
const char * get_table_name() const
Definition: ddl0bulk.h:158
int get_error_code() const
Get the client error code (e.g.
Definition: ddl0bulk.h:208
dict_table_t * m_table
Innodb dictionary table object.
Definition: ddl0bulk.h:191
dberr_t get_error() const
Definition: ddl0bulk.h:219
std::string get_error_string() const
Definition: ddl0bulk.h:197
std::vector< Thread_data, ut::allocator< Thread_data > > Thread_ctxs
Definition: ddl0bulk.h:148
Thread_ctxs m_ctxs
All thread specific data.
Definition: ddl0bulk.h:185
Allocator that allows std::* containers to manage their memory through ut::malloc* and ut::free libra...
Definition: ut0new.h:2181
constexpr size_t DATA_ROLL_PTR_LEN
Rollback data pointer type size in bytes.
Definition: data0type.h:191
constexpr size_t DATA_TRX_ID_LEN
Transaction ID type size in bytes.
Definition: data0type.h:185
dberr_t
Definition: db0err.h:39
@ DB_SUCCESS
Definition: db0err.h:43
void error(const char *format,...)
Definition: ddl0bulk.cc:42
Definition: aligned_atomic.h:44
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2870
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2874
Interface between Innobase row operations and MySQL.
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:324
Definition: bulk_data_service.h:61
id_name_t name
index name
Definition: dict0mem.h:1054
Data structure for a database table.
Definition: dict0mem.h:1909
table_name_t name
Table name.
Definition: dict0mem.h:1984
const dict_index_t * first_index() const
Definition: dict0mem.h:2467
Structure for an SQL data tuple of fields (logical record)
Definition: data0data.h:682
The info structure stored at the beginning of a heap block.
Definition: mem0mem.h:302
A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; th...
Definition: row0mysql.h:515
char * m_name
The name in internal representation.
Definition: dict0mem.h:472