MySQL 9.5.0
Source Code Documentation
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <cstdint>
35#include <cstring>
36#include <functional>
37#include <iomanip>
38#include <iostream>
39#include <limits>
40#include <memory>
41#include <sstream>
42#include <string>
43#include <vector>
44#include "field_types.h"
46
47class THD;
48struct TABLE;
49struct CHARSET_INFO;
50using Blob_context = void *;
51
52/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
53constexpr size_t BLOB_REF_SIZE = 20;
54
56 std::string filename;
57 size_t row_number;
58 std::string column_name;
59 std::string column_type;
60 std::string column_input_data;
61 std::string m_error_mesg{};
62 std::string m_table_name{};
63 size_t m_bytes;
65
66 std::ostream &print(std::ostream &out) const;
67};
68
70 std::ostream &out) const {
71 out << "[Bulk_load_error_location_details: filename=" << filename
72 << ", column_name=" << column_name << "]";
73 return out;
74}
75
76/** Overloading the global output operator to print objects of type
77Bulk_load_error_location_details.
78@param[in] out output stream
79@param[in] obj object to be printed
80@return given output stream. */
81inline std::ostream &operator<<(std::ostream &out,
83 return obj.print(out);
84}
85
87 /** Column data. */
88 const char *m_data_ptr{};
89
90 /** Column data length. */
91 size_t m_data_len{};
92
93 /** Check if it is DB_ROW_ID column based on the value it contains.
94 @return true if it is DB_ROW_ID column, false otherwise */
95 bool is_row_id() const { return m_row_id != UINT64_MAX; }
96
97 /** The generated DB_ROW_ID value */
98 uint64_t m_row_id{UINT64_MAX};
99
100 /** Mark the column to be null, by setting length to a special value. This is
101 only used for columns whose state is maintained across chunks
102 (aka fragmented columns). */
103 void set_null() {
104 assert(m_data_ptr == nullptr);
106 }
107
108 /** Check if the column is null, by checking special value for length.
109 @return true if the column is null, false otherwise. */
110 bool is_null() const {
112 m_data_ptr == nullptr);
114 }
115
116 /** Check if the column data is stored externally. If the data is stored
117 externally, then the data length (m_data_len) would be equal to the
118 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
119 reference.
120 @return true if data is stored externally, false otherwise. */
121 bool is_ext() const {
122 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
123 return m_is_ext;
124 }
125
126 /** Check if the column data is stored externally. It is called relaxed,
127 because the column length might not be equal to BLOB_REF_SIZE. Only to
128 be used while the blob is being processed by the CSV parser.
129 @return true if data is stored externally, false otherwise. */
130 bool is_ext_relaxed() const {
131 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
132 return m_is_ext;
133 }
134
135 /** Mark that the column data has been stored externally. */
136 void set_ext() {
137 assert(m_data_len == BLOB_REF_SIZE);
138 m_is_ext = true;
139 }
140
141 /** Initialize the members */
142 void init() {
143 m_data_ptr = nullptr;
144 m_data_len = 0;
145 m_is_ext = false;
146 m_row_id = UINT64_MAX;
147 }
148
149 /** Print this object into the given output stream.
150 @param[in] out output stream into which this object will be printed.
151 @return given output stream */
152 std::ostream &print(std::ostream &out) const;
153
154 std::string to_string() const;
155
156 private:
157 /** If true, the column data is stored externally. */
158 bool m_is_ext{false};
159};
160
161inline std::string Column_text::to_string() const {
163 sout << "[Column_text: len=" << m_data_len;
164 sout << ", val=";
165
166 if (m_data_ptr == nullptr) {
167 sout << "nullptr";
168 } else {
169 for (size_t i = 0; i < m_data_len; ++i) {
170 const char c = m_data_ptr[i];
171 if (isalnum(c)) {
172 sout << c;
173 } else {
174 sout << ".";
175 }
176 }
177 sout << "[hex=";
178 for (size_t i = 0; i < m_data_len; ++i) {
179 sout << std::setfill('0') << std::setw(2) << std::hex
180 << (int)*(&m_data_ptr[i]);
181 }
182 }
183 sout << "]";
184 return sout.str();
185}
186
187inline std::ostream &Column_text::print(std::ostream &out) const {
188 out << "[Column_text: this=" << static_cast<const void *>(this)
189 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
190 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
191 return out;
192}
193
194/** Overloading the global output operator to print objects of type
195Column_text.
196@param[in] out output stream
197@param[in] obj object to be printed
198@return given output stream. */
199inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
200 return obj.print(out);
201}
202
203struct Row_meta;
204
206 /** Column Data Type */
207 int16_t m_type{};
208
209 /** Column data length. */
210 uint16_t m_data_len{};
211
212 /** If column is NULL. */
213 bool m_is_null{false};
214
215 char *get_data() const { return m_is_null ? nullptr : m_data_ptr; }
216
217 void set_data(char *ptr) { m_data_ptr = ptr; }
218
219 /** Save the beginning of the row pointer in this object. This should be
220 called only when the column is null.
221 @param[in] row_begin pointer to beginning of row.*/
222 void row(char *row_begin) {
223 assert(m_is_null);
224 m_data_len = 0;
225 m_data_ptr = row_begin;
226 }
227
228 /** Get the pointer to the beginning of row. This is valid only if the
229 column is null. This should be called on the first column of the row. There
230 is no need to call this on other columns.
231 @param[in] row_meta meta data information about the row
232 @param[in] col_index Index of the first column which is 0.
233 @return pointer to row beginning. */
234 char *get_row_begin(const Row_meta &row_meta,
235 size_t col_index [[maybe_unused]]) const;
236
237 /** Column data in integer format. Used only for specific datatype. */
238 uint64_t m_int_data;
239
240 void init() {
241 m_type = 0;
242 m_data_len = 0;
243 m_is_null = false;
244 m_data_ptr = nullptr;
245 m_int_data = 0;
246 }
247
248 std::string to_string() const;
249
250 private:
251 /** Column data or row begin. There is a need to fetch the beginning of
252 the row from the vector of Column_mysql. But in the case of secondary
253 indexes, all the keys could be null and it becomes impossible to obtain
254 the pointer to beginning of the row. To solve this problem, I am re-using
255 this pointer to hold the row begin when the column is null. So it becomes
256 important to make use of m_is_null to check if the column is null. It is NOT
257 correct to check this pointer against nullptr to confirm if column is null.*/
258 char *m_data_ptr{nullptr};
259};
260
261inline std::string Column_mysql::to_string() const {
263 sout << "[Column_mysql: type=" << m_type << ", len=" << m_data_len
264 << ", m_int_data=" << m_int_data;
265 sout << ", val=";
266
267 switch (m_type) {
268 case MYSQL_TYPE_LONG: {
269 sout << m_int_data;
270 } break;
271 default: {
272 for (size_t i = 0; i < m_data_len; ++i) {
273 const char c = m_data_ptr[i];
274 if (isalnum(c)) {
275 sout << c;
276 } else {
277 sout << ".";
278 }
279 }
280
281 } break;
282 }
283 if (m_type != MYSQL_TYPE_LONG) {
284 sout << "[hex=";
285 for (size_t i = 0; i < m_data_len; ++i) {
286 sout << std::setfill('0') << std::setw(2) << std::hex
287 << (int)*(&m_data_ptr[i]);
288 }
289 sout << "]";
290 }
291 return sout.str();
292}
293
294/** Implements the row and column memory management for parse and load
295operations. We try to pre-allocate the memory contiguously as much as we can
296to maximize the performance.
297
298@tparam Column_type Column_text when used in the CSV context, Column_sql when
299used in the InnoDB context.
300*/
301template <typename Column_type>
303 public:
304 /** Create a new row bunch.
305 @param[in] n_cols number of columns */
306 Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
307
308 /** @return return number of rows in the bunch. */
309 size_t get_num_rows() const { return m_num_rows; }
310
311 /** @return return number of columns in each row. */
312 size_t get_num_cols() const { return m_num_columns; }
313
314 /** Process all columns, invoking callback for each.
315 @param[in] row_index index of the row
316 @param[in] cbk callback function
317 @return true if successful */
318 template <typename F>
319 bool process_columns(size_t row_index, F &&cbk) {
320 assert(row_index < m_num_rows);
321
322 auto row_offset = row_index * m_num_columns;
323 return process_columns_by_offset(row_offset, std::move(cbk));
324 }
325
326 template <typename F>
327 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
328 assert(row_offset + m_num_columns <= m_columns.size());
329
330 for (size_t index = 0; index < m_num_columns; ++index) {
331 bool last_col = (index == m_num_columns - 1);
332 if (!cbk(m_columns[row_offset + index], last_col)) {
333 return false;
334 }
335 }
336 return true;
337 }
338
339 void reset() {
340 for (auto &col : m_columns) {
341 col.init();
342 }
343 }
344
345 /** Get current row offset to access columns.
346 @param[in] row_index row index
347 @return row offset in column vector. */
348 size_t get_row_offset(size_t row_index) const {
349 assert(row_index < m_num_rows);
350 return row_index * m_num_columns;
351 }
352
353 /** Get next row offset from current row offset.
354 @param[in,out] offset row offset
355 @return true if there is a next row. */
356 size_t get_next_row_offset(size_t &offset) const {
357 offset += m_num_columns;
358 return (offset < m_columns.size());
359 }
360
361 /** Get column using row offset and column index.
362 @param[in] row_offset row offset in column vector
363 @param[in] col_index index of the column within row
364 @return column data */
365 Column_type &get_column(size_t row_offset, size_t col_index) {
366 assert(col_index < m_num_columns);
367 assert(row_offset + col_index < m_columns.size());
368 return m_columns[row_offset + col_index];
369 }
370
371 /** Get column using row index and column index.
372 @param[in] row_index index of the row in the bunch
373 @param[in] col_index index of the column within row
374 @return column data */
375 Column_type &get_col(size_t row_index, size_t col_index) {
376 return get_column(get_row_offset(row_index), col_index);
377 }
378
379 /** Get column using the column offset.
380 @param[in] col_offset column offset
381 @return column data */
382 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
383
384 /** Get constant column for reading using row offset and column index.
385 @param[in] row_offset row offset in column vector
386 @param[in] col_index index of the column within row
387 @return column data */
388 const Column_type &read_column(size_t row_offset, size_t col_index) const {
389 assert(col_index < m_num_columns);
390 assert(row_offset + col_index < m_columns.size());
391 return m_columns[row_offset + col_index];
392 }
393
394 /** Set the number of rows. Adjust number of rows base on maximum column
395 storage limit.
396 @param[in,out] n_rows number of rows
397 @return true if successful, false if too many rows or columns. */
398 bool set_num_rows(size_t n_rows) {
399 /* Avoid any overflow during multiplication. */
400 if (n_rows > std::numeric_limits<uint32_t>::max() ||
402 return false;
403 }
404 auto total_cols = (uint64_t)n_rows * m_num_columns;
405
406 if (total_cols > S_MAX_TOTAL_COLS) {
407 return false;
408 }
409
410 m_num_rows = n_rows;
411
412 /* Extend columns if needed. */
413 if (m_columns.size() < total_cols) {
414 m_columns.resize(total_cols);
415 }
416 return true;
417 }
418
419 /** Limit allocation up to 600M columns. This number is rounded up from an
420 * estimate of the number of columns with the max chunk size (1024M). In the
421 * worst case we can have 2 bytes per column so a chunk can contain around
422 * 512M columns, and because of rows that spill over chunk boundaries we
423 * assume we can append a full additional row (which should have at most
424 * 4096 columns). Rounded up to 600M. */
425 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
426
427 private:
428 /** All the columns. */
429 std::vector<Column_type> m_columns;
430
431 /** Number of rows. */
432 size_t m_num_rows{};
433
434 /** Number of columns in each row. */
436};
437
440
441/** Column metadata information. */
443 /** Data comparison method. */
444 enum class Compare {
445 /* Integer comparison */
447 /* Unsigned Integer comparison */
449 /* Binary comparison (memcmp) */
450 BINARY,
451 /* Need to callback to use appropriate comparison function in server. */
452 MYSQL
453 };
454
455 std::string get_compare_string() const {
456 switch (m_compare) {
458 return "INTEGER_SIGNED";
460 return "INTEGER_UNSIGNED";
461 case Compare::BINARY:
462 return "BINARY";
463 case Compare::MYSQL:
464 return "MYSQL";
465 }
466 assert(0);
467 return "INVALID";
468 }
469
470 /** @return true if integer type. */
471 bool is_integer() const {
474 }
475
476 /** Based on the column data type check if it can be stored externally.
477 @return true if the column data can be stored externally
478 @return false if the column data cannot be stored externally */
479 bool can_be_stored_externally() const;
480
481 /** true if this column is part of secondary index. */
482 bool m_is_part_of_sk{false};
483
484 /** Field type. (@ref enum_field_types) */
486
487 /** If column could be NULL. */
488 bool m_is_nullable{false};
489
490 /** true if column belongs to primary index (key or non-key) */
491 bool m_is_pk{false};
492
493 /** true if column is a key for primary or secondary index. */
494 bool m_is_key{false};
495
496 /** If the key is descending. */
497 bool m_is_desc_key{false};
498
499 /** If the key is prefix of the column. */
500 bool m_is_prefix_key{false};
501
502 /** If it is fixed length type. */
503 bool m_is_fixed_len{false};
504
505 /** If it is integer type. */
507
508 /** If it is unsigned integer type. */
509 bool m_is_unsigned{false};
510
511 /** Check the row header to find out if it is fixed length. For
512 character data type the row header indicates fixed length. */
514
515 /** If character column length can be kept in one byte. */
517
518 /** The length of column data if fixed. */
519 uint16_t m_fixed_len;
520
521 /** Maximum length of data in bytes. */
522 uint16_t m_max_len;
523
524 /** Index of column in row. */
525 uint16_t m_index;
526
527 /** Position of column in table. Refer to Field::field_index() */
529
530 /** Byte index in NULL bitmap. */
531 uint16_t m_null_byte;
532
533 /** BIT number in NULL bitmap. */
534 uint16_t m_null_bit;
535
536 /** Character set for char & varchar columns. */
537 const void *m_charset;
538
539 /** Field name */
540 std::string m_field_name;
541
542 /** Get a string representation of Column_meta object. Useful only for
543 debugging purposes.
544 @see Column_meta
545 @return string representation of this object. */
546 std::string to_string() const;
547
548 /** Print this object into the given output stream.
549 @param[in] out output stream into which object will be printed
550 @return given output stream. */
551 std::ostream &print(std::ostream &out) const;
552
553 /** Get the data type of the column as a string.
554 @return data type of the column as a string. */
555 std::string get_type_string() const;
556};
557
558inline std::string Column_meta::get_type_string() const {
559 switch (m_type) {
561 return "decimal";
562 case MYSQL_TYPE_TINY:
563 return "tiny";
564 case MYSQL_TYPE_SHORT:
565 return "short";
566 case MYSQL_TYPE_LONG:
567 return "long";
568 case MYSQL_TYPE_FLOAT:
569 return "float";
571 return "double";
572 case MYSQL_TYPE_NULL:
573 return "null";
575 return "timestamp";
577 return "longlong";
578 case MYSQL_TYPE_INT24:
579 return "int";
580 case MYSQL_TYPE_DATE:
581 return "date";
582 case MYSQL_TYPE_TIME:
583 return "time";
585 return "datetime";
586 case MYSQL_TYPE_YEAR:
587 return "year";
589 return "date";
591 return "varchar";
592 case MYSQL_TYPE_BIT:
593 return "bit";
595 return "timestamp";
597 return "datetime";
598 case MYSQL_TYPE_TIME2:
599 return "time";
601 return "typed_array";
603 return "vector";
605 return "invalid";
606 case MYSQL_TYPE_BOOL:
607 return "bool";
608 case MYSQL_TYPE_JSON:
609 return "json";
611 return "decimal";
612 case MYSQL_TYPE_ENUM:
613 return "enum";
614 case MYSQL_TYPE_SET:
615 return "set";
617 return "tiny_blob";
619 return "medium_blob";
621 return "long_blob";
622 case MYSQL_TYPE_BLOB:
623 return "blob";
625 return "var_string";
627 return "string";
629 return "geometry";
630 }
631 return "invalid";
632}
633
635 switch (m_type) {
636 case MYSQL_TYPE_JSON:
641 case MYSQL_TYPE_BLOB:
644 return true;
645 }
646 default:
647 break;
648 }
649 return false;
650}
651
652inline std::string Column_meta::to_string() const {
654 out << "[Column_meta: m_type=" << get_type_string()
655 << ", m_field_name=" << m_field_name << ", m_index=" << m_index
656 << ", m_field_index=" << m_field_index
657 << ", m_is_single_byte_len=" << m_is_single_byte_len
658 << ", m_is_fixed_len=" << m_is_fixed_len
659 << ", m_fixed_len=" << m_fixed_len << ", m_null_byte=" << m_null_byte
660 << ", m_null_bit=" << m_null_bit << ", m_compare=" << get_compare_string()
661 << ", m_is_desc_key=" << m_is_desc_key << ", m_is_key=" << m_is_key
662 << ", m_is_prefix_key=" << m_is_prefix_key << "]";
663 return out.str();
664}
665
666inline std::ostream &Column_meta::print(std::ostream &out) const {
667 out << to_string();
668 return out;
669}
670
671/** Overloading the global output operator to print objects of type
672Column_meta.
673@param[in] out output stream
674@param[in] obj object to be printed
675@return given output stream. */
676inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
677 return obj.print(out);
678}
679
680/** Table metadata. */
682 /** Number of keys/indexes the table has. */
683 size_t m_n_keys;
684
685 /** Key number of the primary key. */
687
688 /** True if generated DB_ROW_ID is the pk. */
689 bool dbrowid_is_pk{false};
690
691 /** Table being bulk loaded. */
692 std::string m_table_name;
693};
694
695/** Row metadata */
696struct Row_meta {
697 /** Key type for fast comparison. */
698 enum class Key_type {
699 /* All Keys are signed integer an ascending. */
701 /* All keys are integer. */
702 INT,
703 /* Keys are of any supported type. */
704 ANY
705 };
706 /** All columns in a row are arranged with key columns first. */
707 std::vector<Column_meta> m_columns;
708
709 /** All columns in a row arranged as per col_index. */
710 std::vector<const Column_meta *> m_columns_text_order;
711
712 /** Get a string representation of this Row_meta object.
713 @see Row_meta
714 @return string representation of this object. */
715 std::string to_string() const;
716
717 /** Get the metadata of the given column.
718 @param[in] col_index position of the column in the index.
719 @return metadata of the requested column. */
720 const Column_meta &get_column_meta_index_order(size_t col_index) const {
721 assert(col_index < m_columns.size());
722 return m_columns[col_index];
723 }
724
725 /** Get the meta data of the column.
726 @param[in] col_index the index of the column as it appears in CSV file.
727 @return a reference to the column meta data.*/
728 const Column_meta &get_column_meta(size_t col_index) const {
729 assert(col_index < m_columns_text_order.size());
730 assert(col_index == m_columns_text_order[col_index]->m_index);
731 return *m_columns_text_order[col_index];
732 }
733
734 /** Total bitmap header length for the row. */
735 size_t m_bitmap_length = 0;
736
737 /** Total header length. */
738 size_t m_header_length = 0;
739
740 /** Length of the first key column. Helps to get the row pointer from first
741 key data pointer. */
742 size_t m_first_key_len = 0;
743
744 /** Key length in bytes for non-integer keys. This is required to estimate
745 the space required to save keys. */
746 size_t m_key_length = 0;
747
748 /** Number of columns used in primary key. */
749 uint32_t m_keys = 0;
750
751 /** Number of columns not used in primary Key. */
752 uint32_t m_non_keys = 0;
753
754 /** Key type for comparison. */
756
757 /** Total number of columns. A key could be on a column prefix.
758 m_columns <= m_keys + m_non_keys */
759 uint32_t m_num_columns = 0;
760
761 /** Approximate row length. */
763
764 /** Number of columns that can be stored externally. */
765 size_t m_n_blob_cols{0};
766
767 /** Name of the key */
768 std::string m_name;
769
770 /** true if primary key, false if secondary key. */
771 bool is_pk{false};
772
773 /** true if DB_ROW_ID is the pk, false otherwise. */
774 bool dbrowid_is_pk{false};
775};
776
777inline std::ostream &operator<<(std::ostream &os,
779 switch (key_type) {
781 os << "ANY";
782 break;
784 os << "INT_SIGNED_ASC";
785 break;
787 os << "INT";
788 break;
789 }
790 return os;
791}
792
793inline std::string Row_meta::to_string() const {
795 out << "[Row_meta: m_name=" << m_name << ", m_num_columns=" << m_num_columns
796 << ", m_keys=" << m_keys << ", m_non_keys=" << m_non_keys
797 << ", m_key_length=" << m_key_length << ", m_key_type=" << m_key_type
798 << ", m_approx_row_len=" << m_approx_row_len;
799 for (auto &col_meta : m_columns) {
800 out << col_meta.to_string() << ", ";
801 }
802 out << "]";
803 return out.str();
804}
805
806inline char *Column_mysql::get_row_begin(const Row_meta &row_meta,
807 size_t col_index
808 [[maybe_unused]]) const {
809 assert(m_is_null || col_index == 0);
810 return m_is_null ? m_data_ptr
811 : (m_data_ptr - row_meta.m_first_key_len -
812 row_meta.m_header_length);
813}
814
815namespace Bulk_load {
816
819 public:
820 void KeyTooBig() const override;
821 void ValueTooBig() const override;
822 void TooDeep() const override;
823 void InvalidJson() const override;
824 void InternalError(const char *message) const override;
825 bool CheckStack() const override;
826
827 const char *c_str() const { return m_error.c_str(); }
828
829 std::string get_error() const { return m_error; }
830
831 private:
832 mutable std::string m_error{};
833};
834
836 m_error = "Key is too big";
837}
838
840 m_error = "Value is too big";
841}
842
844 m_error = "JSON document has more nesting levels than supported";
845}
847 m_error = "Invalid JSON value is encountered";
848}
850 const char *message [[maybe_unused]]) const {
851 m_error = message;
852 m_error += " (Internal Error)";
853}
854
856 return false;
857}
858
859/** Callbacks for collecting time statistics */
861 /* Operation begin. */
862 std::function<void()> m_fn_begin;
863 /* Operation end. */
864 std::function<void()> m_fn_end;
865};
866
867} // namespace Bulk_load
868
869/** Bulk Data conversion. */
870BEGIN_SERVICE_DEFINITION(bulk_data_convert)
871/** Convert row from text format for MySQL column format. Convert as many
872rows as possible consuming the data buffer starting form next_index. On
873output next_index is the next row index that is not yet consumed. If it
874matches the size of input text_rows, then all rows are consumed.
875@param[in,out] thd session THD
876@param[in] table MySQL TABLE
877@param[in] text_rows rows with column in text
878@param[in,out] next_index next_index in text_rows to be processed
879@param[in,out] buffer data buffer for keeping sql row data
880@param[in,out] buffer_length length of the data buffer
881@param[in] charset input row data character set
882@param[in] metadata row metadata
883@param[out] sql_rows rows with column in MySQL column format
884@return error code. */
886 (THD * thd, const TABLE *table, const Rows_text &text_rows,
887 size_t &next_index, char *buffer, size_t &buffer_length,
888 const CHARSET_INFO *charset, const Row_meta &metadata,
889 Rows_mysql &sql_rows,
891
892/** Convert row to MySQL column format from raw form
893@param[in,out] buffer input raw data buffer
894@param[in] buffer_length buffer length
895@param[in] metadata row metadata
896@param[in] start_index start row index in row bunch
897@param[out] consumed_length length of buffer consumed
898@param[in,out] sql_rows row bunch to fill data
899@return error code. */
901 (char *buffer, size_t buffer_length, const Row_meta &metadata,
902 size_t start_index, size_t &consumed_length,
903 Rows_mysql &sql_rows));
904
905/** Convert row to MySQL column format using the key
906@param[in] metadata row metadata
907@param[in] sql_keys Key bunch
908@param[in] key_offset offset for the key
909@param[in,out] sql_rows row bunch to fill data
910@param[in] sql_index index of the row to be filled
911@return error code. */
913 (const Row_meta &metadata, const Rows_mysql &sql_keys,
914 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
915
916/** Check if session is interrupted.
917@param[in,out] thd session THD
918@return true if connection or statement is killed. */
920
921/** Compare two key columns
922@param[in] key1 first key
923@param[in] key2 second key
924@param[in] col_meta column meta information
925@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
927 (const Column_mysql &key1, const Column_mysql &key2,
928 const Column_meta &col_meta));
929
930/** Get row metadata information for all the indexes.
931@param[in,out] thd session THD
932@param[in] table MySQL TABLE
933@param[in] have_key include Primary Key metadata
934@param[out] metadata Metadata for each of the indexes.
935@return true if successful. */
937 (THD * thd, const TABLE *table, bool have_key,
938 std::vector<Row_meta> &metadata));
939
940/** Get table metadata information for the table being bulk loaded.
941@param[in,out] thd session THD
942@param[in] table MySQL TABLE
943@param[out] metadata Metadata of the table.
944@return true if successful. */
946 (THD * thd, const TABLE *table, Table_meta &metadata));
947
948END_SERVICE_DEFINITION(bulk_data_convert)
949
950/** Column metadata information. */
951/* Bulk data load to SE. */
953/** Begin Loading bulk data to SE.
954@param[in,out] thd session THD
955@param[in] table MySQL TABLE
956@param[in] keynr key number, identifying the index being loaded.
957@param[in] data_size total data size to load
958@param[in] memory SE memory to be used
959@param[in] num_threads Number of concurrent threads
960@return SE bulk load context or nullptr in case of an error. */
961DECLARE_METHOD(void *, begin,
962 (THD * thd, const TABLE *table, size_t keynr, size_t data_size,
963 size_t memory, size_t num_threads));
964
965/** Load a set of rows to SE table by one thread.
966@param[in,out] thd session THD
967@param[in,out] ctx SE load context returned by begin()
968@param[in] table MySQL TABLE
969@param[in] sql_rows row data to load
970@param[in] thread current thread number
971@param[in] wait_cbks wait stat callbacks
972@return true if successful. */
973DECLARE_METHOD(bool, load,
974 (THD * thd, void *ctx, const TABLE *table,
975 const Rows_mysql &sql_rows, size_t thread,
976 Bulk_load::Stat_callbacks &wait_cbks));
977
978/** Create a blob context object to insert a blob.
979@param[in,out] thd session THD
980@param[in,out] load_ctx SE load context returned by begin()
981@param[in] table MySQL TABLE
982@param[out] blob_ctx a blob context object to insert a blob.
983@param[out] blobref buffer to hold blob reference
984@param[in] thread current thread number
985@return true if successful. */
987 (THD * thd, void *load_ctx, const TABLE *table,
988 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
989
990/** Write data into a blob
991@param[in,out] thd session THD
992@param[in,out] load_ctx SE load context returned by begin()
993@param[in] table MySQL TABLE
994@param[in] blob_ctx a blob context object to insert a blob.
995@param[out] blobref buffer to hold blob reference
996@param[in] thread current thread number
997@param[in] data blob data to be written
998@param[in] data_len length of blob data to be written (in bytes);
999@return true if successful. */
1001 (THD * thd, void *load_ctx, const TABLE *table,
1002 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
1003 const unsigned char *data, size_t data_len));
1004
1005/** Close the blob
1006@param[in,out] thd session THD
1007@param[in,out] load_ctx SE load context returned by begin()
1008@param[in] table MySQL TABLE
1009@param[in] blob_ctx a blob context object to insert a blob.
1010@param[out] blobref buffer to hold blob reference
1011@param[in] thread current thread number
1012@return true if successful. */
1014 (THD * thd, void *load_ctx, const TABLE *table,
1015 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
1016
1017/** End Loading bulk data to SE.
1018
1019Called at the end of bulk load execution, even if begin or load calls failed.
1020
1021@param[in,out] thd session THD
1022@param[in,out] ctx SE load context
1023@param[in] table MySQL TABLE
1024@param[in] error true, if exiting after error
1025@return true if successful. */
1026DECLARE_METHOD(bool, end,
1027 (THD * thd, void *ctx, const TABLE *table, bool error));
1028
1029/** Check if a table is supported by the bulk load implementation.
1030@param[in,out] thd session THD
1031@param[in] table MySQL TABLE
1032@return true if table is supported. */
1034
1035/** Get available buffer pool memory for bulk load operations.
1036@param[in,out] thd session THD
1037@param[in] table MySQL TABLE
1038@return buffer pool memory available for bulk load. */
1040
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:247
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:53
void * Blob_context
Definition: bulk_data_service.h:50
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:81
Definition: bulk_data_service.h:818
const char * c_str() const
Definition: bulk_data_service.h:827
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:835
std::string get_error() const
Definition: bulk_data_service.h:829
std::string m_error
Definition: bulk_data_service.h:832
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:849
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:839
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:843
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:846
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:855
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:302
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:398
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:429
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:356
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:382
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:319
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:327
size_t get_num_cols() const
Definition: bulk_data_service.h:312
void reset()
Definition: bulk_data_service.h:339
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:432
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:348
size_t get_num_rows() const
Definition: bulk_data_service.h:309
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:388
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:375
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:306
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:425
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:435
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:365
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_BOOL
Currently just a placeholder.
Definition: field_types.h:79
@ MYSQL_TYPE_TIME2
Internal to MySQL.
Definition: field_types.h:75
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONGLONG
Definition: field_types.h:64
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_VAR_STRING
Definition: field_types.h:88
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_TINY
Definition: field_types.h:57
@ MYSQL_TYPE_TIME
Definition: field_types.h:67
@ MYSQL_TYPE_SET
Definition: field_types.h:83
@ MYSQL_TYPE_NEWDATE
Internal to MySQL.
Definition: field_types.h:70
@ MYSQL_TYPE_VECTOR
Definition: field_types.h:77
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_STRING
Definition: field_types.h:89
@ MYSQL_TYPE_NULL
Definition: field_types.h:62
@ MYSQL_TYPE_ENUM
Definition: field_types.h:82
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_BIT
Definition: field_types.h:72
@ MYSQL_TYPE_INVALID
Definition: field_types.h:78
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_NEWDECIMAL
Definition: field_types.h:81
@ MYSQL_TYPE_DECIMAL
Definition: field_types.h:56
@ MYSQL_TYPE_TYPED_ARRAY
Used for replication only.
Definition: field_types.h:76
@ MYSQL_TYPE_DOUBLE
Definition: field_types.h:61
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
@ MYSQL_TYPE_DATETIME2
Internal to MySQL.
Definition: field_types.h:74
@ MYSQL_TYPE_SHORT
Definition: field_types.h:58
@ MYSQL_TYPE_DATE
Definition: field_types.h:66
@ MYSQL_TYPE_FLOAT
Definition: field_types.h:60
@ MYSQL_TYPE_TIMESTAMP
Definition: field_types.h:63
@ MYSQL_TYPE_INT24
Definition: field_types.h:65
@ MYSQL_TYPE_DATETIME
Definition: field_types.h:68
@ MYSQL_TYPE_TIMESTAMP2
Definition: field_types.h:73
@ MYSQL_TYPE_YEAR
Definition: field_types.h:69
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2438
static uint16 key1[1001]
Definition: hp_test2.cc:50
#define F
Definition: jit_executor_value.cc:374
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1857
bool get_table_metadata(THD *thd, const TABLE *table, Table_meta &table_meta) noexcept
Definition: bulk_data_service.cc:2528
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1888
bool get_row_metadata_all(THD *thd, const TABLE *table, bool have_key, std::vector< Row_meta > &row_meta_all) noexcept
Definition: bulk_data_service.cc:2547
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1798
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1940
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2613
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2721
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2623
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2631
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2725
Definition: bulk_data_service.h:815
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
std::string hex(const Container &c)
Definition: hex.h:61
bool index(const std::string &value, const String &search_for, uint32_t *idx)
Definition: contains.h:76
int key_type
Definition: method.h:38
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2876
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:860
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:862
std::function< void()> m_fn_end
Definition: bulk_data_service.h:864
Definition: bulk_data_service.h:55
std::string filename
Definition: bulk_data_service.h:56
std::string m_table_name
Definition: bulk_data_service.h:62
size_t m_bytes
Definition: bulk_data_service.h:63
size_t row_number
Definition: bulk_data_service.h:57
std::string column_input_data
Definition: bulk_data_service.h:60
std::string column_name
Definition: bulk_data_service.h:58
size_t m_column_length
Definition: bulk_data_service.h:64
std::string m_error_mesg
Definition: bulk_data_service.h:61
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:69
std::string column_type
Definition: bulk_data_service.h:59
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:442
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:500
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:485
std::string m_field_name
Field name.
Definition: bulk_data_service.h:540
std::string get_type_string() const
Get the data type of the column as a string.
Definition: bulk_data_service.h:558
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:525
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:516
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:531
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:497
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:506
bool m_is_pk
true if column belongs to primary index (key or non-key)
Definition: bulk_data_service.h:491
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:519
std::string to_string() const
Get a string representation of Column_meta object.
Definition: bulk_data_service.h:652
bool is_integer() const
Definition: bulk_data_service.h:471
Compare
Data comparison method.
Definition: bulk_data_service.h:444
uint16_t m_field_index
Position of column in table.
Definition: bulk_data_service.h:528
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:522
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:503
bool m_is_key
true if column is a key for primary or secondary index.
Definition: bulk_data_service.h:494
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:534
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:634
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:513
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:666
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:488
bool m_is_part_of_sk
true if this column is part of secondary index.
Definition: bulk_data_service.h:482
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:509
std::string get_compare_string() const
Definition: bulk_data_service.h:455
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:537
Definition: bulk_data_service.h:205
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:238
std::string to_string() const
Definition: bulk_data_service.h:261
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:213
char * get_row_begin(const Row_meta &row_meta, size_t col_index) const
Get the pointer to the beginning of row.
Definition: bulk_data_service.h:806
char * m_data_ptr
Column data or row begin.
Definition: bulk_data_service.h:258
char * get_data() const
Definition: bulk_data_service.h:215
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:207
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:210
void set_data(char *ptr)
Definition: bulk_data_service.h:217
void init()
Definition: bulk_data_service.h:240
void row(char *row_begin)
Save the beginning of the row pointer in this object.
Definition: bulk_data_service.h:222
Definition: bulk_data_service.h:86
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:110
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:130
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:158
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:103
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:187
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:88
bool is_row_id() const
Check if it is DB_ROW_ID column based on the value it contains.
Definition: bulk_data_service.h:95
uint64_t m_row_id
The generated DB_ROW_ID value.
Definition: bulk_data_service.h:98
void init()
Initialize the members.
Definition: bulk_data_service.h:142
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:136
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:121
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:91
std::string to_string() const
Definition: bulk_data_service.h:161
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:696
const Column_meta & get_column_meta_index_order(size_t col_index) const
Get the metadata of the given column.
Definition: bulk_data_service.h:720
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:728
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:765
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:735
std::string to_string() const
Get a string representation of this Row_meta object.
Definition: bulk_data_service.h:793
bool dbrowid_is_pk
true if DB_ROW_ID is the pk, false otherwise.
Definition: bulk_data_service.h:774
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:742
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:738
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:698
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:752
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:759
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:749
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:746
std::string m_name
Name of the key.
Definition: bulk_data_service.h:768
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:707
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:710
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:755
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:762
bool is_pk
true if primary key, false if secondary key.
Definition: bulk_data_service.h:771
Definition: table.h:1435
Table metadata.
Definition: bulk_data_service.h:681
size_t m_keynr_pk
Key number of the primary key.
Definition: bulk_data_service.h:686
bool dbrowid_is_pk
True if generated DB_ROW_ID is the pk.
Definition: bulk_data_service.h:689
std::string m_table_name
Table being bulk loaded.
Definition: bulk_data_service.h:692
size_t m_n_keys
Number of keys/indexes the table has.
Definition: bulk_data_service.h:683