MySQL 9.0.1
Source Code Documentation
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <cstring>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <string>
42#include <vector>
43#include "field_types.h"
45
46class THD;
47struct TABLE;
48struct CHARSET_INFO;
49using Blob_context = void *;
50
51/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
52constexpr size_t BLOB_REF_SIZE = 20;
53
55 std::string filename;
56 size_t row_number;
57 std::string column_name;
58 std::string column_type;
59 std::string column_input_data;
60 std::string m_error_mesg{};
61 std::string m_table_name{};
62 size_t m_bytes;
63
64 std::ostream &print(std::ostream &out) const;
65};
66
68 std::ostream &out) const {
69 out << "[Bulk_load_error_location_details: filename=" << filename
70 << ", column_name=" << column_name << "]";
71 return out;
72}
73
74/** Overloading the global output operator to print objects of type
75Bulk_load_error_location_details.
76@param[in] out output stream
77@param[in] obj object to be printed
78@return given output stream. */
79inline std::ostream &operator<<(std::ostream &out,
81 return obj.print(out);
82}
83
85 /** Column data. */
86 const char *m_data_ptr{};
87
88 /** Column data length. */
89 size_t m_data_len{};
90
91 /** Mark the column to be null, by setting length to a special value. This is
92 only used for columns whose state is maintained across chunks
93 (aka fragmented columns). */
94 void set_null() {
95 assert(m_data_ptr == nullptr);
96 m_data_len = std::numeric_limits<size_t>::max();
97 }
98
99 /** Check if the column is null, by checking special value for length.
100 @return true if the column is null, false otherwise. */
101 bool is_null() const {
102 assert(m_data_len != std::numeric_limits<size_t>::max() ||
103 m_data_ptr == nullptr);
104 return m_data_len == std::numeric_limits<size_t>::max();
105 }
106
107 /** Check if the column data is stored externally. If the data is stored
108 externally, then the data length (m_data_len) would be equal to the
109 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
110 reference.
111 @return true if data is stored externally, false otherwise. */
112 bool is_ext() const {
113 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
114 return m_is_ext;
115 }
116
117 /** Check if the column data is stored externally. It is called relaxed,
118 because the column length might not be equal to BLOB_REF_SIZE. Only to
119 be used while the blob is being processed by the CSV parser.
120 @return true if data is stored externally, false otherwise. */
121 bool is_ext_relaxed() const {
122 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
123 return m_is_ext;
124 }
125
126 /** Mark that the column data has been stored externally. */
127 void set_ext() {
128 assert(m_data_len == BLOB_REF_SIZE);
129 m_is_ext = true;
130 }
131
132 /** Initialize the members */
133 void init() {
134 m_data_ptr = nullptr;
135 m_data_len = 0;
136 m_is_ext = false;
137 }
138
139 /** Print this object into the given output stream.
140 @param[in] out output stream into which this object will be printed.
141 @return given output stream */
142 std::ostream &print(std::ostream &out) const;
143
144 std::string to_string() const;
145
146 private:
147 /** If true, the column data is stored externally. */
148 bool m_is_ext{false};
149};
150
151inline std::string Column_text::to_string() const {
153 sout << "[Column_text: len=" << m_data_len;
154 sout << ", val=";
155
156 if (m_data_ptr == nullptr) {
157 sout << "nullptr";
158 } else {
159 for (size_t i = 0; i < m_data_len; ++i) {
160 const char c = m_data_ptr[i];
161 if (isalnum(c)) {
162 sout << c;
163 } else {
164 sout << ".";
165 }
166 }
167 sout << "[hex=";
168 for (size_t i = 0; i < m_data_len; ++i) {
169 sout << std::setfill('0') << std::setw(2) << std::hex
170 << (int)*(&m_data_ptr[i]);
171 }
172 }
173 sout << "]";
174 return sout.str();
175}
176
177inline std::ostream &Column_text::print(std::ostream &out) const {
178 out << "[Column_text: this=" << static_cast<const void *>(this)
179 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
180 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
181 return out;
182}
183
184/** Overloading the global output operator to print objects of type
185Column_text.
186@param[in] out output stream
187@param[in] obj object to be printed
188@return given output stream. */
189inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
190 return obj.print(out);
191}
192
194 /** Column Data Type */
195 int16_t m_type{};
196
197 /** Column data length. */
198 uint16_t m_data_len{};
199
200 /** If column is NULL. */
201 bool m_is_null{false};
202
203 /** Column data */
204 char *m_data_ptr{};
205
206 /** Column data in integer format. Used only for specific datatype. */
207 uint64_t m_int_data;
208
209 std::string to_string() const;
210};
211
212inline std::string Column_mysql::to_string() const {
214 sout << "[Column_mysql: len=" << m_data_len;
215 sout << ", val=";
216
217 switch (m_type) {
218 case MYSQL_TYPE_LONG: {
219 sout << m_int_data;
220 } break;
221 default: {
222 for (size_t i = 0; i < m_data_len; ++i) {
223 const char c = m_data_ptr[i];
224 if (isalnum(c)) {
225 sout << c;
226 } else {
227 sout << ".";
228 }
229 }
230
231 } break;
232 }
233 if (m_type != MYSQL_TYPE_LONG) {
234 sout << "[hex=";
235 for (size_t i = 0; i < m_data_len; ++i) {
236 sout << std::setfill('0') << std::setw(2) << std::hex
237 << (int)*(&m_data_ptr[i]);
238 }
239 sout << "]";
240 }
241 return sout.str();
242}
243
244/** Implements the row and column memory management for parse and load
245operations. We try to pre-allocate the memory contiguously as much as we can
246to maximize the performance.
247
248@tparam Column_type Column_text when used in the CSV context, Column_sql when
249used in the InnoDB context.
250*/
251template <typename Column_type>
253 public:
254 /** Create a new row bunch.
255 @param[in] n_cols number of columns */
256 Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
257
258 /** @return return number of rows in the bunch. */
259 size_t get_num_rows() const { return m_num_rows; }
260
261 /** @return return number of columns in each row. */
262 size_t get_num_cols() const { return m_num_columns; }
263
264 /** Process all columns, invoking callback for each.
265 @param[in] row_index index of the row
266 @param[in] cbk callback function
267 @return true if successful */
268 template <typename F>
269 bool process_columns(size_t row_index, F &&cbk) {
270 assert(row_index < m_num_rows);
271
272 auto row_offset = row_index * m_num_columns;
273 return process_columns_by_offset(row_offset, std::move(cbk));
274 }
275
276 template <typename F>
277 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
278 assert(row_offset + m_num_columns <= m_columns.size());
279
280 for (size_t index = 0; index < m_num_columns; ++index) {
281 bool last_col = (index == m_num_columns - 1);
282 if (!cbk(m_columns[row_offset + index], last_col)) {
283 return false;
284 }
285 }
286 return true;
287 }
288
289 /** Get current row offset to access columns.
290 @param[in] row_index row index
291 @return row offset in column vector. */
292 size_t get_row_offset(size_t row_index) const {
293 assert(row_index < m_num_rows);
294 return row_index * m_num_columns;
295 }
296
297 /** Get next row offset from current row offset.
298 @param[in,out] offset row offset
299 @return true if there is a next row. */
300 size_t get_next_row_offset(size_t &offset) const {
301 offset += m_num_columns;
302 return (offset < m_columns.size());
303 }
304
305 /** Get column using row offset and column index.
306 @param[in] row_offset row offset in column vector
307 @param[in] col_index index of the column within row
308 @return column data */
309 Column_type &get_column(size_t row_offset, size_t col_index) {
310 assert(col_index < m_num_columns);
311 assert(row_offset + col_index < m_columns.size());
312 return m_columns[row_offset + col_index];
313 }
314
315 /** Get column using row index and column index.
316 @param[in] row_index index of the row in the bunch
317 @param[in] col_index index of the column within row
318 @return column data */
319 Column_type &get_col(size_t row_index, size_t col_index) {
320 return get_column(get_row_offset(row_index), col_index);
321 }
322
323 /** Get column using the column offset.
324 @param[in] col_offset column offset
325 @return column data */
326 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
327
328 /** Get constant column for reading using row offset and column index.
329 @param[in] row_offset row offset in column vector
330 @param[in] col_index index of the column within row
331 @return column data */
332 const Column_type &read_column(size_t row_offset, size_t col_index) const {
333 assert(col_index < m_num_columns);
334 assert(row_offset + col_index < m_columns.size());
335 return m_columns[row_offset + col_index];
336 }
337
338 /** Set the number of rows. Adjust number of rows base on maximum column
339 storage limit.
340 @param[in,out] n_rows number of rows
341 @return true if successful, false if too many rows or columns. */
342 bool set_num_rows(size_t n_rows) {
343 /* Avoid any overflow during multiplication. */
344 if (n_rows > std::numeric_limits<uint32_t>::max() ||
345 m_num_columns > std::numeric_limits<uint32_t>::max()) {
346 return false;
347 }
348 auto total_cols = (uint64_t)n_rows * m_num_columns;
349
350 if (total_cols > S_MAX_TOTAL_COLS) {
351 return false;
352 }
353
354 m_num_rows = n_rows;
355
356 /* Extend columns if needed. */
357 if (m_columns.size() < total_cols) {
358 m_columns.resize(total_cols);
359 }
360 return true;
361 }
362
363 /** Limit allocation up to 600M columns. This number is rounded up from an
364 * estimate of the number of columns with the max chunk size (1024M). In the
365 * worst case we can have 2 bytes per column so a chunk can contain around
366 * 512M columns, and because of rows that spill over chunk boundaries we
367 * assume we can append a full additional row (which should have at most
368 * 4096 columns). Rounded up to 600M. */
369 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
370
371 private:
372 /** All the columns. */
373 std::vector<Column_type> m_columns;
374
375 /** Number of rows. */
376 size_t m_num_rows{};
377
378 /** Number of columns in each row. */
380};
381
384
385/** Column metadata information. */
387 /** Data comparison method. */
388 enum class Compare {
389 /* Integer comparison */
391 /* Unsigned Integer comparison */
393 /* Binary comparison (memcmp) */
394 BINARY,
395 /* Need to callback to use appropriate comparison function in server. */
396 MYSQL
397 };
398
399 /** @return true if integer type. */
400 bool is_integer() const {
403 }
404
405 /** Based on the column data type check if it can be stored externally.
406 @return true if the column data can be stored externally
407 @return false if the column data cannot be stored externally */
408 bool can_be_stored_externally() const;
409
410 /** Field type. (@ref enum_field_types) */
412
413 /** If column could be NULL. */
415
416 /** If column is part of primary key. */
418
419 /** If the key is descending. */
421
422 /** If the key is prefix of the column. */
424
425 /** If it is fixed length type. */
427
428 /** If it is integer type. */
430
431 /** If it is unsigned integer type. */
433
434 /** Check the row header to find out if it is fixed length. For
435 character data type the row header indicates fixed length. */
437
438 /** If character column length can be kept in one byte. */
440
441 /** The length of column data if fixed. */
442 uint16_t m_fixed_len;
443
444 /** Maximum length of data in bytes. */
445 uint16_t m_max_len;
446
447 /** Index of column in row. */
448 uint16_t m_index;
449
450 /** Byte index in NULL bitmap. */
451 uint16_t m_null_byte;
452
453 /** BIT number in NULL bitmap. */
454 uint16_t m_null_bit;
455
456 /** Character set for char & varchar columns. */
457 const void *m_charset;
458
459 /** Field name */
460 std::string m_field_name;
461
462 /** Print this object into the given output stream.
463 @param[in] out output stream into which object will be printed
464 @return given output stream. */
465 std::ostream &print(std::ostream &out) const;
466};
467
469 switch (m_type) {
470 case MYSQL_TYPE_JSON:
474 case MYSQL_TYPE_BLOB:
477 return true;
478 }
479 default:
480 break;
481 }
482 return false;
483}
484
485inline std::ostream &Column_meta::print(std::ostream &out) const {
486 out << "[Column_meta: m_is_single_byte_len=" << m_is_single_byte_len
487 << ", m_is_fixed_len=" << m_is_fixed_len
488 << ", m_fixed_len=" << m_fixed_len << "]";
489 return out;
490}
491
492/** Overloading the global output operator to print objects of type
493Column_meta.
494@param[in] out output stream
495@param[in] obj object to be printed
496@return given output stream. */
497inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
498 return obj.print(out);
499}
500
501/** Row metadata */
502struct Row_meta {
503 /** Key type for fast comparison. */
504 enum class Key_type {
505 /* All Keys are signed integer an ascending. */
507 /* All keys are integer. */
508 INT,
509 /* Keys are of any supported type. */
510 ANY
511 };
512 /** All columns in a row are arranged with key columns first. */
513 std::vector<Column_meta> m_columns;
514
515 /** All columns in a row arranged as per col_index. */
516 std::vector<const Column_meta *> m_columns_text_order;
517
518 /** Get the meta data of the column.
519 @param[in] col_index the index of the column as it appears in CSV file.
520 @return a reference to the column meta data.*/
521 const Column_meta &get_column_meta(size_t col_index) const {
522 assert(col_index < m_columns_text_order.size());
523 assert(col_index == m_columns_text_order[col_index]->m_index);
524 return *m_columns_text_order[col_index];
525 }
526
527 /** Total bitmap header length for the row. */
528 size_t m_bitmap_length = 0;
529
530 /** Total header length. */
531 size_t m_header_length = 0;
532
533 /** Length of the first key column. Helps to get the row pointer from first
534 key data pointer. */
535 size_t m_first_key_len = 0;
536
537 /** Key length in bytes for non-integer keys. This is required to estimate
538 the space required to save keys. */
539 size_t m_key_length = 0;
540
541 /** Number of columns used in primary key. */
542 uint32_t m_keys = 0;
543
544 /** Number of columns not used in primary Key. */
545 uint32_t m_non_keys = 0;
546
547 /** Key type for comparison. */
549
550 /** Total number of columns. A key could be on a column prefix.
551 m_columns <= m_keys + m_non_keys */
552 uint32_t m_num_columns = 0;
553
554 /** Approximate row length. */
556
557 /** Number of columns that can be stored externally. */
558 size_t m_n_blob_cols{0};
559};
560
561namespace Bulk_load {
562
565 public:
566 void KeyTooBig() const override;
567 void ValueTooBig() const override;
568 void TooDeep() const override;
569 void InvalidJson() const override;
570 void InternalError(const char *message) const override;
571 bool CheckStack() const override;
572
573 const char *c_str() const { return m_error.c_str(); }
574
575 std::string get_error() const { return m_error; }
576
577 private:
578 mutable std::string m_error{};
579};
580
582 m_error = "Key is too big";
583}
584
586 m_error = "Value is too big";
587}
588
590 m_error = "JSON document has more nesting levels than supported";
591}
593 m_error = "Invalid JSON value is encountered";
594}
596 const char *message [[maybe_unused]]) const {
597 m_error = message;
598 m_error += " (Internal Error)";
599}
600
602 return false;
603}
604
605/** Callbacks for collecting time statistics */
607 /* Operation begin. */
608 std::function<void()> m_fn_begin;
609 /* Operation end. */
610 std::function<void()> m_fn_end;
611};
612
613} // namespace Bulk_load
614
615/** Bulk Data conversion. */
616BEGIN_SERVICE_DEFINITION(bulk_data_convert)
617/** Convert row from text format for MySQL column format. Convert as many
618rows as possible consuming the data buffer starting form next_index. On
619output next_index is the next row index that is not yet consumed. If it
620matches the size of input text_rows, then all rows are consumed.
621@param[in,out] thd session THD
622@param[in] table MySQL TABLE
623@param[in] text_rows rows with column in text
624@param[in,out] next_index next_index in text_rows to be processed
625@param[in,out] buffer data buffer for keeping sql row data
626@param[in,out] buffer_length length of the data buffer
627@param[in] charset input row data character set
628@param[in] metadata row metadata
629@param[out] sql_rows rows with column in MySQL column format
630@return error code. */
632 (THD * thd, const TABLE *table, const Rows_text &text_rows,
633 size_t &next_index, char *buffer, size_t &buffer_length,
634 const CHARSET_INFO *charset, const Row_meta &metadata,
635 Rows_mysql &sql_rows,
637
638/** Convert row to MySQL column format from raw form
639@param[in,out] buffer input raw data buffer
640@param[in] buffer_length buffer length
641@param[in] metadata row metadata
642@param[in] start_index start row index in row bunch
643@param[out] consumed_length length of buffer consumed
644@param[in,out] sql_rows row bunch to fill data
645@return error code. */
647 (char *buffer, size_t buffer_length, const Row_meta &metadata,
648 size_t start_index, size_t &consumed_length,
649 Rows_mysql &sql_rows));
650
651/** Convert row to MySQL column format using the key
652@param[in] metadata row metadata
653@param[in] sql_keys Key bunch
654@param[in] key_offset offset for the key
655@param[in,out] sql_rows row bunch to fill data
656@param[in] sql_index index of the row to be filled
657@return error code. */
659 (const Row_meta &metadata, const Rows_mysql &sql_keys,
660 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
661
662/** Check if session is interrupted.
663@param[in,out] thd session THD
664@return true if connection or statement is killed. */
666
667/** Compare two key columns
668@param[in] key1 first key
669@param[in] key2 second key
670@param[in] col_meta column meta information
671@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
673 (const Column_mysql &key1, const Column_mysql &key2,
674 const Column_meta &col_meta));
675
676/** Get Table row metadata.
677@param[in,out] thd session THD
678@param[in] table MySQL TABLE
679@param[in] have_key include Primary Key metadata
680@param[out] metadata Metadata
681@return true if successful. */
683 (THD * thd, const TABLE *table, bool have_key,
684 Row_meta &metadata));
685
686END_SERVICE_DEFINITION(bulk_data_convert)
687
688/** Column metadata information. */
689/* Bulk data load to SE. */
691/** Begin Loading bulk data to SE.
692@param[in,out] thd session THD
693@param[in] table MySQL TABLE
694@param[in] data_size total data size to load
695@param[in] memory SE memory to be used
696@param[in] num_threads Number of concurrent threads
697@return SE bulk load context or nullptr in case of an error. */
698DECLARE_METHOD(void *, begin,
699 (THD * thd, const TABLE *table, size_t data_size, size_t memory,
700 size_t num_threads));
701
702/** Load a set of rows to SE table by one thread.
703@param[in,out] thd session THD
704@param[in,out] ctx SE load context returned by begin()
705@param[in] table MySQL TABLE
706@param[in] sql_rows row data to load
707@param[in] thread current thread number
708@param[in] wait_cbks wait stat callbacks
709@return true if successful. */
710DECLARE_METHOD(bool, load,
711 (THD * thd, void *ctx, const TABLE *table,
712 const Rows_mysql &sql_rows, size_t thread,
713 Bulk_load::Stat_callbacks &wait_cbks));
714
715/** Create a blob context object to insert a blob.
716@param[in,out] thd session THD
717@param[in,out] load_ctx SE load context returned by begin()
718@param[in] table MySQL TABLE
719@param[out] blob_ctx a blob context object to insert a blob.
720@param[out] blobref buffer to hold blob reference
721@param[in] thread current thread number
722@return true if successful. */
724 (THD * thd, void *load_ctx, const TABLE *table,
725 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
726
727/** Write data into a blob
728@param[in,out] thd session THD
729@param[in,out] load_ctx SE load context returned by begin()
730@param[in] table MySQL TABLE
731@param[in] blob_ctx a blob context object to insert a blob.
732@param[out] blobref buffer to hold blob reference
733@param[in] thread current thread number
734@param[in] data blob data to be written
735@param[in] data_len length of blob data to be written (in bytes);
736@return true if successful. */
738 (THD * thd, void *load_ctx, const TABLE *table,
739 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
740 const unsigned char *data, size_t data_len));
741
742/** Close the blob
743@param[in,out] thd session THD
744@param[in,out] load_ctx SE load context returned by begin()
745@param[in] table MySQL TABLE
746@param[in] blob_ctx a blob context object to insert a blob.
747@param[out] blobref buffer to hold blob reference
748@param[in] thread current thread number
749@return true if successful. */
751 (THD * thd, void *load_ctx, const TABLE *table,
752 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
753
754/** End Loading bulk data to SE.
755
756Called at the end of bulk load execution, even if begin or load calls failed.
757
758@param[in,out] thd session THD
759@param[in,out] ctx SE load context
760@param[in] table MySQL TABLE
761@param[in] error true, if exiting after error
762@return true if successful. */
763DECLARE_METHOD(bool, end,
764 (THD * thd, void *ctx, const TABLE *table, bool error));
765
766/** Check if a table is supported by the bulk load implementation.
767@param[in,out] thd session THD
768@param[in] table MySQL TABLE
769@return true if table is supported. */
771
772/** Get available buffer pool memory for bulk load operations.
773@param[in,out] thd session THD
774@param[in] table MySQL TABLE
775@return buffer pool memory available for bulk load. */
777
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:52
void * Blob_context
Definition: bulk_data_service.h:49
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:79
Definition: bulk_data_service.h:564
const char * c_str() const
Definition: bulk_data_service.h:573
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:581
std::string get_error() const
Definition: bulk_data_service.h:575
std::string m_error
Definition: bulk_data_service.h:578
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:595
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:585
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:589
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:592
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:601
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:252
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:342
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:373
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:300
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:326
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:269
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:277
size_t get_num_cols() const
Definition: bulk_data_service.h:262
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:376
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:292
size_t get_num_rows() const
Definition: bulk_data_service.h:259
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:332
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:319
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:256
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:369
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:379
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:309
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2171
static uint16 key1[1001]
Definition: hp_test2.cc:50
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1710
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1739
bool get_row_metadata(THD *, const TABLE *table, bool have_key, Row_meta &metadata) noexcept
Definition: bulk_data_service.cc:1964
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1689
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1791
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2147
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2255
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2157
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2165
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2259
Definition: bulk_data_service.h:561
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
Definition: aligned_atomic.h:44
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2871
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:606
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:608
std::function< void()> m_fn_end
Definition: bulk_data_service.h:610
Definition: bulk_data_service.h:54
std::string filename
Definition: bulk_data_service.h:55
std::string m_table_name
Definition: bulk_data_service.h:61
size_t m_bytes
Definition: bulk_data_service.h:62
size_t row_number
Definition: bulk_data_service.h:56
std::string column_input_data
Definition: bulk_data_service.h:59
std::string column_name
Definition: bulk_data_service.h:57
std::string m_error_mesg
Definition: bulk_data_service.h:60
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:67
std::string column_type
Definition: bulk_data_service.h:58
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:386
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:423
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:411
std::string m_field_name
Field name.
Definition: bulk_data_service.h:460
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:448
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:439
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:451
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:420
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:429
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:442
bool is_integer() const
Definition: bulk_data_service.h:400
Compare
Data comparison method.
Definition: bulk_data_service.h:388
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:445
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:426
bool m_is_key
If column is part of primary key.
Definition: bulk_data_service.h:417
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:454
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:468
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:436
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:485
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:414
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:432
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:457
Definition: bulk_data_service.h:193
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:207
std::string to_string() const
Definition: bulk_data_service.h:212
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:201
char * m_data_ptr
Column data.
Definition: bulk_data_service.h:204
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:195
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:198
Definition: bulk_data_service.h:84
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:101
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:121
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:148
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:94
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:177
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:86
void init()
Initialize the members.
Definition: bulk_data_service.h:133
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:127
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:112
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:89
std::string to_string() const
Definition: bulk_data_service.h:151
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:502
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:521
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:558
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:528
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:535
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:531
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:504
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:545
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:552
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:542
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:539
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:513
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:516
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:548
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:555
Definition: table.h:1407