MySQL 9.3.0
Source Code Documentation
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <cstring>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <string>
42#include <vector>
43#include "field_types.h"
45
46class THD;
47struct TABLE;
48struct CHARSET_INFO;
49using Blob_context = void *;
50
51/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
52constexpr size_t BLOB_REF_SIZE = 20;
53
55 std::string filename;
56 size_t row_number;
57 std::string column_name;
58 std::string column_type;
59 std::string column_input_data;
60 std::string m_error_mesg{};
61 std::string m_table_name{};
62 size_t m_bytes;
64
65 std::ostream &print(std::ostream &out) const;
66};
67
69 std::ostream &out) const {
70 out << "[Bulk_load_error_location_details: filename=" << filename
71 << ", column_name=" << column_name << "]";
72 return out;
73}
74
75/** Overloading the global output operator to print objects of type
76Bulk_load_error_location_details.
77@param[in] out output stream
78@param[in] obj object to be printed
79@return given output stream. */
80inline std::ostream &operator<<(std::ostream &out,
82 return obj.print(out);
83}
84
86 /** Column data. */
87 const char *m_data_ptr{};
88
89 /** Column data length. */
90 size_t m_data_len{};
91
92 /** Mark the column to be null, by setting length to a special value. This is
93 only used for columns whose state is maintained across chunks
94 (aka fragmented columns). */
95 void set_null() {
96 assert(m_data_ptr == nullptr);
98 }
99
100 /** Check if the column is null, by checking special value for length.
101 @return true if the column is null, false otherwise. */
102 bool is_null() const {
104 m_data_ptr == nullptr);
106 }
107
108 /** Check if the column data is stored externally. If the data is stored
109 externally, then the data length (m_data_len) would be equal to the
110 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
111 reference.
112 @return true if data is stored externally, false otherwise. */
113 bool is_ext() const {
114 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
115 return m_is_ext;
116 }
117
118 /** Check if the column data is stored externally. It is called relaxed,
119 because the column length might not be equal to BLOB_REF_SIZE. Only to
120 be used while the blob is being processed by the CSV parser.
121 @return true if data is stored externally, false otherwise. */
122 bool is_ext_relaxed() const {
123 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
124 return m_is_ext;
125 }
126
127 /** Mark that the column data has been stored externally. */
128 void set_ext() {
129 assert(m_data_len == BLOB_REF_SIZE);
130 m_is_ext = true;
131 }
132
133 /** Initialize the members */
134 void init() {
135 m_data_ptr = nullptr;
136 m_data_len = 0;
137 m_is_ext = false;
138 }
139
140 /** Print this object into the given output stream.
141 @param[in] out output stream into which this object will be printed.
142 @return given output stream */
143 std::ostream &print(std::ostream &out) const;
144
145 std::string to_string() const;
146
147 private:
148 /** If true, the column data is stored externally. */
149 bool m_is_ext{false};
150};
151
152inline std::string Column_text::to_string() const {
154 sout << "[Column_text: len=" << m_data_len;
155 sout << ", val=";
156
157 if (m_data_ptr == nullptr) {
158 sout << "nullptr";
159 } else {
160 for (size_t i = 0; i < m_data_len; ++i) {
161 const char c = m_data_ptr[i];
162 if (isalnum(c)) {
163 sout << c;
164 } else {
165 sout << ".";
166 }
167 }
168 sout << "[hex=";
169 for (size_t i = 0; i < m_data_len; ++i) {
170 sout << std::setfill('0') << std::setw(2) << std::hex
171 << (int)*(&m_data_ptr[i]);
172 }
173 }
174 sout << "]";
175 return sout.str();
176}
177
178inline std::ostream &Column_text::print(std::ostream &out) const {
179 out << "[Column_text: this=" << static_cast<const void *>(this)
180 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
181 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
182 return out;
183}
184
185/** Overloading the global output operator to print objects of type
186Column_text.
187@param[in] out output stream
188@param[in] obj object to be printed
189@return given output stream. */
190inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
191 return obj.print(out);
192}
193
194struct Row_meta;
195
197 /** Column Data Type */
198 int16_t m_type{};
199
200 /** Column data length. */
201 uint16_t m_data_len{};
202
203 /** If column is NULL. */
204 bool m_is_null{false};
205
206 char *get_data() const { return m_is_null ? nullptr : m_data_ptr; }
207
208 void set_data(char *ptr) { m_data_ptr = ptr; }
209
210 /** Save the beginning of the row pointer in this object. This should be
211 called only when the column is null.
212 @param[in] row_begin pointer to beginning of row.*/
213 void row(char *row_begin) {
214 assert(m_is_null);
215 m_data_len = 0;
216 m_data_ptr = row_begin;
217 }
218
219 /** Get the pointer to the beginning of row. This is valid only if the
220 column is null. This should be called on the first column of the row. There
221 is no need to call this on other columns.
222 @param[in] row_meta meta data information about the row
223 @param[in] col_index Index of the first column which is 0.
224 @return pointer to row beginning. */
225 char *get_row_begin(const Row_meta &row_meta,
226 size_t col_index [[maybe_unused]]) const;
227
228 /** Column data in integer format. Used only for specific datatype. */
229 uint64_t m_int_data;
230
231 void init() {
232 m_type = 0;
233 m_data_len = 0;
234 m_is_null = false;
235 m_data_ptr = nullptr;
236 m_int_data = 0;
237 }
238
239 std::string to_string() const;
240
241 private:
242 /** Column data or row begin. There is a need to fetch the beginning of
243 the row from the vector of Column_mysql. But in the case of secondary
244 indexes, all the keys could be null and it becomes impossible to obtain
245 the pointer to beginning of the row. To solve this problem, I am re-using
246 this pointer to hold the row begin when the column is null. So it becomes
247 important to make use of m_is_null to check if the column is null. It is NOT
248 correct to check this pointer against nullptr to confirm if column is null.*/
249 char *m_data_ptr{nullptr};
250};
251
252inline std::string Column_mysql::to_string() const {
254 sout << "[Column_mysql: len=" << m_data_len;
255 sout << ", val=";
256
257 switch (m_type) {
258 case MYSQL_TYPE_LONG: {
259 sout << m_int_data;
260 } break;
261 default: {
262 for (size_t i = 0; i < m_data_len; ++i) {
263 const char c = m_data_ptr[i];
264 if (isalnum(c)) {
265 sout << c;
266 } else {
267 sout << ".";
268 }
269 }
270
271 } break;
272 }
273 if (m_type != MYSQL_TYPE_LONG) {
274 sout << "[hex=";
275 for (size_t i = 0; i < m_data_len; ++i) {
276 sout << std::setfill('0') << std::setw(2) << std::hex
277 << (int)*(&m_data_ptr[i]);
278 }
279 sout << "]";
280 }
281 return sout.str();
282}
283
284/** Implements the row and column memory management for parse and load
285operations. We try to pre-allocate the memory contiguously as much as we can
286to maximize the performance.
287
288@tparam Column_type Column_text when used in the CSV context, Column_sql when
289used in the InnoDB context.
290*/
291template <typename Column_type>
293 public:
294 /** Create a new row bunch.
295 @param[in] n_cols number of columns */
296 Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
297
298 /** @return return number of rows in the bunch. */
299 size_t get_num_rows() const { return m_num_rows; }
300
301 /** @return return number of columns in each row. */
302 size_t get_num_cols() const { return m_num_columns; }
303
304 /** Process all columns, invoking callback for each.
305 @param[in] row_index index of the row
306 @param[in] cbk callback function
307 @return true if successful */
308 template <typename F>
309 bool process_columns(size_t row_index, F &&cbk) {
310 assert(row_index < m_num_rows);
311
312 auto row_offset = row_index * m_num_columns;
313 return process_columns_by_offset(row_offset, std::move(cbk));
314 }
315
316 template <typename F>
317 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
318 assert(row_offset + m_num_columns <= m_columns.size());
319
320 for (size_t index = 0; index < m_num_columns; ++index) {
321 bool last_col = (index == m_num_columns - 1);
322 if (!cbk(m_columns[row_offset + index], last_col)) {
323 return false;
324 }
325 }
326 return true;
327 }
328
329 void reset() {
330 for (auto &col : m_columns) {
331 col.init();
332 }
333 }
334
335 /** Get current row offset to access columns.
336 @param[in] row_index row index
337 @return row offset in column vector. */
338 size_t get_row_offset(size_t row_index) const {
339 assert(row_index < m_num_rows);
340 return row_index * m_num_columns;
341 }
342
343 /** Get next row offset from current row offset.
344 @param[in,out] offset row offset
345 @return true if there is a next row. */
346 size_t get_next_row_offset(size_t &offset) const {
347 offset += m_num_columns;
348 return (offset < m_columns.size());
349 }
350
351 /** Get column using row offset and column index.
352 @param[in] row_offset row offset in column vector
353 @param[in] col_index index of the column within row
354 @return column data */
355 Column_type &get_column(size_t row_offset, size_t col_index) {
356 assert(col_index < m_num_columns);
357 assert(row_offset + col_index < m_columns.size());
358 return m_columns[row_offset + col_index];
359 }
360
361 /** Get column using row index and column index.
362 @param[in] row_index index of the row in the bunch
363 @param[in] col_index index of the column within row
364 @return column data */
365 Column_type &get_col(size_t row_index, size_t col_index) {
366 return get_column(get_row_offset(row_index), col_index);
367 }
368
369 /** Get column using the column offset.
370 @param[in] col_offset column offset
371 @return column data */
372 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
373
374 /** Get constant column for reading using row offset and column index.
375 @param[in] row_offset row offset in column vector
376 @param[in] col_index index of the column within row
377 @return column data */
378 const Column_type &read_column(size_t row_offset, size_t col_index) const {
379 assert(col_index < m_num_columns);
380 assert(row_offset + col_index < m_columns.size());
381 return m_columns[row_offset + col_index];
382 }
383
384 /** Set the number of rows. Adjust number of rows base on maximum column
385 storage limit.
386 @param[in,out] n_rows number of rows
387 @return true if successful, false if too many rows or columns. */
388 bool set_num_rows(size_t n_rows) {
389 /* Avoid any overflow during multiplication. */
390 if (n_rows > std::numeric_limits<uint32_t>::max() ||
392 return false;
393 }
394 auto total_cols = (uint64_t)n_rows * m_num_columns;
395
396 if (total_cols > S_MAX_TOTAL_COLS) {
397 return false;
398 }
399
400 m_num_rows = n_rows;
401
402 /* Extend columns if needed. */
403 if (m_columns.size() < total_cols) {
404 m_columns.resize(total_cols);
405 }
406 return true;
407 }
408
409 /** Limit allocation up to 600M columns. This number is rounded up from an
410 * estimate of the number of columns with the max chunk size (1024M). In the
411 * worst case we can have 2 bytes per column so a chunk can contain around
412 * 512M columns, and because of rows that spill over chunk boundaries we
413 * assume we can append a full additional row (which should have at most
414 * 4096 columns). Rounded up to 600M. */
415 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
416
417 private:
418 /** All the columns. */
419 std::vector<Column_type> m_columns;
420
421 /** Number of rows. */
422 size_t m_num_rows{};
423
424 /** Number of columns in each row. */
426};
427
430
431/** Column metadata information. */
433 /** Data comparison method. */
434 enum class Compare {
435 /* Integer comparison */
437 /* Unsigned Integer comparison */
439 /* Binary comparison (memcmp) */
440 BINARY,
441 /* Need to callback to use appropriate comparison function in server. */
442 MYSQL
443 };
444
445 std::string get_compare_string() const {
446 switch (m_compare) {
448 return "INTEGER_SIGNED";
450 return "INTEGER_UNSIGNED";
451 case Compare::BINARY:
452 return "BINARY";
453 case Compare::MYSQL:
454 return "MYSQL";
455 }
456 assert(0);
457 return "INVALID";
458 }
459
460 /** @return true if integer type. */
461 bool is_integer() const {
464 }
465
466 /** Based on the column data type check if it can be stored externally.
467 @return true if the column data can be stored externally
468 @return false if the column data cannot be stored externally */
469 bool can_be_stored_externally() const;
470
471 /** true if this column is part of secondary index. */
473
474 /** Field type. (@ref enum_field_types) */
476
477 /** If column could be NULL. */
479
480 /** true if column belongs to primary index (key or non-key) */
481 bool m_is_pk{false};
482
483 /** true if column is a key for primary or secondary index. */
485
486 /** If the key is descending. */
488
489 /** If the key is prefix of the column. */
491
492 /** If it is fixed length type. */
494
495 /** If it is integer type. */
497
498 /** If it is unsigned integer type. */
500
501 /** Check the row header to find out if it is fixed length. For
502 character data type the row header indicates fixed length. */
504
505 /** If character column length can be kept in one byte. */
507
508 /** The length of column data if fixed. */
509 uint16_t m_fixed_len;
510
511 /** Maximum length of data in bytes. */
512 uint16_t m_max_len;
513
514 /** Index of column in row. */
515 uint16_t m_index;
516
517 /** Position of column in table. Refer to Field::field_index() */
519
520 /** Byte index in NULL bitmap. */
521 uint16_t m_null_byte;
522
523 /** BIT number in NULL bitmap. */
524 uint16_t m_null_bit;
525
526 /** Character set for char & varchar columns. */
527 const void *m_charset;
528
529 /** Field name */
530 std::string m_field_name;
531
532 /** Get a string representation of Column_meta object. Useful only for
533 debugging purposes.
534 @see Column_meta
535 @return string representation of this object. */
536 std::string to_string() const;
537
538 /** Print this object into the given output stream.
539 @param[in] out output stream into which object will be printed
540 @return given output stream. */
541 std::ostream &print(std::ostream &out) const;
542
543 /** Get the data type of the column as a string.
544 @return data type of the column as a string. */
545 std::string get_type_string() const;
546};
547
548inline std::string Column_meta::get_type_string() const {
549 switch (m_type) {
551 return "decimal";
552 case MYSQL_TYPE_TINY:
553 return "tiny";
554 case MYSQL_TYPE_SHORT:
555 return "short";
556 case MYSQL_TYPE_LONG:
557 return "long";
558 case MYSQL_TYPE_FLOAT:
559 return "float";
561 return "double";
562 case MYSQL_TYPE_NULL:
563 return "null";
565 return "timestamp";
567 return "longlong";
568 case MYSQL_TYPE_INT24:
569 return "int";
570 case MYSQL_TYPE_DATE:
571 return "date";
572 case MYSQL_TYPE_TIME:
573 return "time";
575 return "datetime";
576 case MYSQL_TYPE_YEAR:
577 return "year";
579 return "date";
581 return "varchar";
582 case MYSQL_TYPE_BIT:
583 return "bit";
585 return "timestamp";
587 return "datetime";
588 case MYSQL_TYPE_TIME2:
589 return "time";
591 return "typed_array";
593 return "vector";
595 return "invalid";
596 case MYSQL_TYPE_BOOL:
597 return "bool";
598 case MYSQL_TYPE_JSON:
599 return "json";
601 return "decimal";
602 case MYSQL_TYPE_ENUM:
603 return "enum";
604 case MYSQL_TYPE_SET:
605 return "set";
607 return "tiny_blob";
609 return "medium_blob";
611 return "long_blob";
612 case MYSQL_TYPE_BLOB:
613 return "blob";
615 return "var_string";
617 return "string";
619 return "geometry";
620 }
621 return "invalid";
622}
623
625 switch (m_type) {
626 case MYSQL_TYPE_JSON:
631 case MYSQL_TYPE_BLOB:
634 return true;
635 }
636 default:
637 break;
638 }
639 return false;
640}
641
642inline std::string Column_meta::to_string() const {
644 out << "[Column_meta: m_type=" << get_type_string()
645 << ", m_field_name=" << m_field_name << ", m_index=" << m_index
646 << ", m_field_index=" << m_field_index
647 << ", m_is_single_byte_len=" << m_is_single_byte_len
648 << ", m_is_fixed_len=" << m_is_fixed_len
649 << ", m_fixed_len=" << m_fixed_len << ", m_null_byte=" << m_null_byte
650 << ", m_null_bit=" << m_null_bit << ", m_compare=" << get_compare_string()
651 << ", m_is_desc_key=" << m_is_desc_key << "]";
652 return out.str();
653}
654
655inline std::ostream &Column_meta::print(std::ostream &out) const {
656 out << to_string();
657 return out;
658}
659
660/** Overloading the global output operator to print objects of type
661Column_meta.
662@param[in] out output stream
663@param[in] obj object to be printed
664@return given output stream. */
665inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
666 return obj.print(out);
667}
668
669/** Table metadata. */
671 /** Number of keys/indexes the table has. */
672 size_t m_n_keys;
673
674 /** Key number of the primary key. */
676};
677
678/** Row metadata */
679struct Row_meta {
680 /** Key type for fast comparison. */
681 enum class Key_type {
682 /* All Keys are signed integer an ascending. */
684 /* All keys are integer. */
685 INT,
686 /* Keys are of any supported type. */
687 ANY
688 };
689 /** All columns in a row are arranged with key columns first. */
690 std::vector<Column_meta> m_columns;
691
692 /** All columns in a row arranged as per col_index. */
693 std::vector<const Column_meta *> m_columns_text_order;
694
695 /** Get a string representation of this Row_meta object.
696 @see Row_meta
697 @return string representation of this object. */
698 std::string to_string() const;
699
700 /** Get the metadata of the given column.
701 @param[in] col_index position of the column in the index.
702 @return metadata of the requested column. */
703 const Column_meta &get_column_meta_index_order(size_t col_index) const {
704 assert(col_index < m_columns.size());
705 return m_columns[col_index];
706 }
707
708 /** Get the meta data of the column.
709 @param[in] col_index the index of the column as it appears in CSV file.
710 @return a reference to the column meta data.*/
711 const Column_meta &get_column_meta(size_t col_index) const {
712 assert(col_index < m_columns_text_order.size());
713 assert(col_index == m_columns_text_order[col_index]->m_index);
714 return *m_columns_text_order[col_index];
715 }
716
717 /** Total bitmap header length for the row. */
718 size_t m_bitmap_length = 0;
719
720 /** Total header length. */
721 size_t m_header_length = 0;
722
723 /** Length of the first key column. Helps to get the row pointer from first
724 key data pointer. */
725 size_t m_first_key_len = 0;
726
727 /** Key length in bytes for non-integer keys. This is required to estimate
728 the space required to save keys. */
729 size_t m_key_length = 0;
730
731 /** Number of columns used in primary key. */
732 uint32_t m_keys = 0;
733
734 /** Number of columns not used in primary Key. */
735 uint32_t m_non_keys = 0;
736
737 /** Key type for comparison. */
739
740 /** Total number of columns. A key could be on a column prefix.
741 m_columns <= m_keys + m_non_keys */
742 uint32_t m_num_columns = 0;
743
744 /** Approximate row length. */
746
747 /** Number of columns that can be stored externally. */
748 size_t m_n_blob_cols{0};
749
750 /** Name of the key */
751 std::string m_name;
752
753 /** true if primary key, false if secondary key. */
754 bool is_pk;
755};
756
757inline std::ostream &operator<<(std::ostream &os,
759 switch (key_type) {
761 os << "ANY";
762 break;
764 os << "INT_SIGNED_ASC";
765 break;
767 os << "INT";
768 break;
769 }
770 return os;
771}
772
773inline std::string Row_meta::to_string() const {
775 out << "[Row_meta: m_name=" << m_name << ", m_num_columns=" << m_num_columns
776 << ", m_keys=" << m_keys << ", m_non_keys=" << m_non_keys
777 << ", m_key_length=" << m_key_length << ", m_key_type=" << m_key_type
778 << ", m_approx_row_len=" << m_approx_row_len;
779 for (auto &col_meta : m_columns) {
780 out << col_meta.to_string() << ", ";
781 }
782 out << "]";
783 return out.str();
784}
785
786inline char *Column_mysql::get_row_begin(const Row_meta &row_meta,
787 size_t col_index
788 [[maybe_unused]]) const {
789 assert(m_is_null || col_index == 0);
790 return m_is_null ? m_data_ptr
791 : (m_data_ptr - row_meta.m_first_key_len -
792 row_meta.m_header_length);
793}
794
795namespace Bulk_load {
796
799 public:
800 void KeyTooBig() const override;
801 void ValueTooBig() const override;
802 void TooDeep() const override;
803 void InvalidJson() const override;
804 void InternalError(const char *message) const override;
805 bool CheckStack() const override;
806
807 const char *c_str() const { return m_error.c_str(); }
808
809 std::string get_error() const { return m_error; }
810
811 private:
812 mutable std::string m_error{};
813};
814
816 m_error = "Key is too big";
817}
818
820 m_error = "Value is too big";
821}
822
824 m_error = "JSON document has more nesting levels than supported";
825}
827 m_error = "Invalid JSON value is encountered";
828}
830 const char *message [[maybe_unused]]) const {
831 m_error = message;
832 m_error += " (Internal Error)";
833}
834
836 return false;
837}
838
839/** Callbacks for collecting time statistics */
841 /* Operation begin. */
842 std::function<void()> m_fn_begin;
843 /* Operation end. */
844 std::function<void()> m_fn_end;
845};
846
847} // namespace Bulk_load
848
849/** Bulk Data conversion. */
850BEGIN_SERVICE_DEFINITION(bulk_data_convert)
851/** Convert row from text format for MySQL column format. Convert as many
852rows as possible consuming the data buffer starting form next_index. On
853output next_index is the next row index that is not yet consumed. If it
854matches the size of input text_rows, then all rows are consumed.
855@param[in,out] thd session THD
856@param[in] table MySQL TABLE
857@param[in] text_rows rows with column in text
858@param[in,out] next_index next_index in text_rows to be processed
859@param[in,out] buffer data buffer for keeping sql row data
860@param[in,out] buffer_length length of the data buffer
861@param[in] charset input row data character set
862@param[in] metadata row metadata
863@param[out] sql_rows rows with column in MySQL column format
864@return error code. */
866 (THD * thd, const TABLE *table, const Rows_text &text_rows,
867 size_t &next_index, char *buffer, size_t &buffer_length,
868 const CHARSET_INFO *charset, const Row_meta &metadata,
869 Rows_mysql &sql_rows,
871
872/** Convert row to MySQL column format from raw form
873@param[in,out] buffer input raw data buffer
874@param[in] buffer_length buffer length
875@param[in] metadata row metadata
876@param[in] start_index start row index in row bunch
877@param[out] consumed_length length of buffer consumed
878@param[in,out] sql_rows row bunch to fill data
879@return error code. */
881 (char *buffer, size_t buffer_length, const Row_meta &metadata,
882 size_t start_index, size_t &consumed_length,
883 Rows_mysql &sql_rows));
884
885/** Convert row to MySQL column format using the key
886@param[in] metadata row metadata
887@param[in] sql_keys Key bunch
888@param[in] key_offset offset for the key
889@param[in,out] sql_rows row bunch to fill data
890@param[in] sql_index index of the row to be filled
891@return error code. */
893 (const Row_meta &metadata, const Rows_mysql &sql_keys,
894 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
895
896/** Check if session is interrupted.
897@param[in,out] thd session THD
898@return true if connection or statement is killed. */
900
901/** Compare two key columns
902@param[in] key1 first key
903@param[in] key2 second key
904@param[in] col_meta column meta information
905@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
907 (const Column_mysql &key1, const Column_mysql &key2,
908 const Column_meta &col_meta));
909
910/** Get row metadata information for all the indexes.
911@param[in,out] thd session THD
912@param[in] table MySQL TABLE
913@param[in] have_key include Primary Key metadata
914@param[out] metadata Metadata for each of the indexes.
915@return true if successful. */
917 (THD * thd, const TABLE *table, bool have_key,
918 std::vector<Row_meta> &metadata));
919
920/** Get table metadata information for the table being bulk loaded.
921@param[in,out] thd session THD
922@param[in] table MySQL TABLE
923@param[out] metadata Metadata of the table.
924@return true if successful. */
926 (THD * thd, const TABLE *table, Table_meta &metadata));
927
928END_SERVICE_DEFINITION(bulk_data_convert)
929
930/** Column metadata information. */
931/* Bulk data load to SE. */
933/** Begin Loading bulk data to SE.
934@param[in,out] thd session THD
935@param[in] table MySQL TABLE
936@param[in] keynr key number, identifying the index being loaded.
937@param[in] data_size total data size to load
938@param[in] memory SE memory to be used
939@param[in] num_threads Number of concurrent threads
940@return SE bulk load context or nullptr in case of an error. */
941DECLARE_METHOD(void *, begin,
942 (THD * thd, const TABLE *table, size_t keynr, size_t data_size,
943 size_t memory, size_t num_threads));
944
945/** Load a set of rows to SE table by one thread.
946@param[in,out] thd session THD
947@param[in,out] ctx SE load context returned by begin()
948@param[in] table MySQL TABLE
949@param[in] sql_rows row data to load
950@param[in] thread current thread number
951@param[in] wait_cbks wait stat callbacks
952@return true if successful. */
953DECLARE_METHOD(bool, load,
954 (THD * thd, void *ctx, const TABLE *table,
955 const Rows_mysql &sql_rows, size_t thread,
956 Bulk_load::Stat_callbacks &wait_cbks));
957
958/** Create a blob context object to insert a blob.
959@param[in,out] thd session THD
960@param[in,out] load_ctx SE load context returned by begin()
961@param[in] table MySQL TABLE
962@param[out] blob_ctx a blob context object to insert a blob.
963@param[out] blobref buffer to hold blob reference
964@param[in] thread current thread number
965@return true if successful. */
967 (THD * thd, void *load_ctx, const TABLE *table,
968 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
969
970/** Write data into a blob
971@param[in,out] thd session THD
972@param[in,out] load_ctx SE load context returned by begin()
973@param[in] table MySQL TABLE
974@param[in] blob_ctx a blob context object to insert a blob.
975@param[out] blobref buffer to hold blob reference
976@param[in] thread current thread number
977@param[in] data blob data to be written
978@param[in] data_len length of blob data to be written (in bytes);
979@return true if successful. */
981 (THD * thd, void *load_ctx, const TABLE *table,
982 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
983 const unsigned char *data, size_t data_len));
984
985/** Close the blob
986@param[in,out] thd session THD
987@param[in,out] load_ctx SE load context returned by begin()
988@param[in] table MySQL TABLE
989@param[in] blob_ctx a blob context object to insert a blob.
990@param[out] blobref buffer to hold blob reference
991@param[in] thread current thread number
992@return true if successful. */
994 (THD * thd, void *load_ctx, const TABLE *table,
995 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
996
997/** End Loading bulk data to SE.
998
999Called at the end of bulk load execution, even if begin or load calls failed.
1000
1001@param[in,out] thd session THD
1002@param[in,out] ctx SE load context
1003@param[in] table MySQL TABLE
1004@param[in] error true, if exiting after error
1005@return true if successful. */
1006DECLARE_METHOD(bool, end,
1007 (THD * thd, void *ctx, const TABLE *table, bool error));
1008
1009/** Check if a table is supported by the bulk load implementation.
1010@param[in,out] thd session THD
1011@param[in] table MySQL TABLE
1012@return true if table is supported. */
1014
1015/** Get available buffer pool memory for bulk load operations.
1016@param[in,out] thd session THD
1017@param[in] table MySQL TABLE
1018@return buffer pool memory available for bulk load. */
1020
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:247
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:52
void * Blob_context
Definition: bulk_data_service.h:49
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:80
Definition: bulk_data_service.h:798
const char * c_str() const
Definition: bulk_data_service.h:807
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:815
std::string get_error() const
Definition: bulk_data_service.h:809
std::string m_error
Definition: bulk_data_service.h:812
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:829
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:819
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:823
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:826
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:835
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:292
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:388
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:419
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:346
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:372
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:309
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:317
size_t get_num_cols() const
Definition: bulk_data_service.h:302
void reset()
Definition: bulk_data_service.h:329
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:422
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:338
size_t get_num_rows() const
Definition: bulk_data_service.h:299
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:378
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:365
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:296
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:415
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:425
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:355
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_BOOL
Currently just a placeholder.
Definition: field_types.h:79
@ MYSQL_TYPE_TIME2
Internal to MySQL.
Definition: field_types.h:75
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONGLONG
Definition: field_types.h:64
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_VAR_STRING
Definition: field_types.h:88
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_TINY
Definition: field_types.h:57
@ MYSQL_TYPE_TIME
Definition: field_types.h:67
@ MYSQL_TYPE_SET
Definition: field_types.h:83
@ MYSQL_TYPE_NEWDATE
Internal to MySQL.
Definition: field_types.h:70
@ MYSQL_TYPE_VECTOR
Definition: field_types.h:77
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_STRING
Definition: field_types.h:89
@ MYSQL_TYPE_NULL
Definition: field_types.h:62
@ MYSQL_TYPE_ENUM
Definition: field_types.h:82
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_BIT
Definition: field_types.h:72
@ MYSQL_TYPE_INVALID
Definition: field_types.h:78
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_NEWDECIMAL
Definition: field_types.h:81
@ MYSQL_TYPE_DECIMAL
Definition: field_types.h:56
@ MYSQL_TYPE_TYPED_ARRAY
Used for replication only.
Definition: field_types.h:76
@ MYSQL_TYPE_DOUBLE
Definition: field_types.h:61
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
@ MYSQL_TYPE_DATETIME2
Internal to MySQL.
Definition: field_types.h:74
@ MYSQL_TYPE_SHORT
Definition: field_types.h:58
@ MYSQL_TYPE_DATE
Definition: field_types.h:66
@ MYSQL_TYPE_FLOAT
Definition: field_types.h:60
@ MYSQL_TYPE_TIMESTAMP
Definition: field_types.h:63
@ MYSQL_TYPE_INT24
Definition: field_types.h:65
@ MYSQL_TYPE_DATETIME
Definition: field_types.h:68
@ MYSQL_TYPE_TIMESTAMP2
Definition: field_types.h:73
@ MYSQL_TYPE_YEAR
Definition: field_types.h:69
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2438
static uint16 key1[1001]
Definition: hp_test2.cc:50
#define F
Definition: jit_executor_value.cc:374
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1818
bool get_table_metadata(THD *thd, const TABLE *table, Table_meta &table_meta) noexcept
Definition: bulk_data_service.cc:2421
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1849
bool get_row_metadata_all(THD *thd, const TABLE *table, bool have_key, std::vector< Row_meta > &row_meta_all) noexcept
Definition: bulk_data_service.cc:2430
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1759
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1901
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2485
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2593
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2495
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2503
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2597
Definition: bulk_data_service.h:795
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
std::string hex(const Container &c)
Definition: hex.h:61
bool index(const std::string &value, const String &search_for, uint32_t *idx)
Definition: contains.h:75
int key_type
Definition: method.h:38
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:840
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:842
std::function< void()> m_fn_end
Definition: bulk_data_service.h:844
Definition: bulk_data_service.h:54
std::string filename
Definition: bulk_data_service.h:55
std::string m_table_name
Definition: bulk_data_service.h:61
size_t m_bytes
Definition: bulk_data_service.h:62
size_t row_number
Definition: bulk_data_service.h:56
std::string column_input_data
Definition: bulk_data_service.h:59
std::string column_name
Definition: bulk_data_service.h:57
size_t m_column_length
Definition: bulk_data_service.h:63
std::string m_error_mesg
Definition: bulk_data_service.h:60
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:68
std::string column_type
Definition: bulk_data_service.h:58
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:432
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:490
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:475
std::string m_field_name
Field name.
Definition: bulk_data_service.h:530
std::string get_type_string() const
Get the data type of the column as a string.
Definition: bulk_data_service.h:548
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:515
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:506
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:521
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:487
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:496
bool m_is_pk
true if column belongs to primary index (key or non-key)
Definition: bulk_data_service.h:481
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:509
std::string to_string() const
Get a string representation of Column_meta object.
Definition: bulk_data_service.h:642
bool is_integer() const
Definition: bulk_data_service.h:461
Compare
Data comparison method.
Definition: bulk_data_service.h:434
uint16_t m_field_index
Position of column in table.
Definition: bulk_data_service.h:518
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:512
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:493
bool m_is_key
true if column is a key for primary or secondary index.
Definition: bulk_data_service.h:484
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:524
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:624
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:503
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:655
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:478
bool m_is_part_of_sk
true if this column is part of secondary index.
Definition: bulk_data_service.h:472
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:499
std::string get_compare_string() const
Definition: bulk_data_service.h:445
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:527
Definition: bulk_data_service.h:196
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:229
std::string to_string() const
Definition: bulk_data_service.h:252
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:204
char * get_row_begin(const Row_meta &row_meta, size_t col_index) const
Get the pointer to the beginning of row.
Definition: bulk_data_service.h:786
char * m_data_ptr
Column data or row begin.
Definition: bulk_data_service.h:249
char * get_data() const
Definition: bulk_data_service.h:206
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:198
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:201
void set_data(char *ptr)
Definition: bulk_data_service.h:208
void init()
Definition: bulk_data_service.h:231
void row(char *row_begin)
Save the beginning of the row pointer in this object.
Definition: bulk_data_service.h:213
Definition: bulk_data_service.h:85
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:102
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:122
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:149
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:95
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:178
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:87
void init()
Initialize the members.
Definition: bulk_data_service.h:134
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:128
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:113
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:90
std::string to_string() const
Definition: bulk_data_service.h:152
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:679
const Column_meta & get_column_meta_index_order(size_t col_index) const
Get the metadata of the given column.
Definition: bulk_data_service.h:703
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:711
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:748
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:718
std::string to_string() const
Get a string representation of this Row_meta object.
Definition: bulk_data_service.h:773
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:725
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:721
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:681
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:735
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:742
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:732
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:729
std::string m_name
Name of the key.
Definition: bulk_data_service.h:751
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:690
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:693
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:738
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:745
bool is_pk
true if primary key, false if secondary key.
Definition: bulk_data_service.h:754
Definition: table.h:1425
Table metadata.
Definition: bulk_data_service.h:670
size_t m_keynr_pk
Key number of the primary key.
Definition: bulk_data_service.h:675
size_t m_n_keys
Number of keys/indexes the table has.
Definition: bulk_data_service.h:672