MySQL 9.1.0
Source Code Documentation
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <cstring>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <string>
42#include <vector>
43#include "field_types.h"
45
46class THD;
47struct TABLE;
48struct CHARSET_INFO;
49using Blob_context = void *;
50
51/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
52constexpr size_t BLOB_REF_SIZE = 20;
53
55 std::string filename;
56 size_t row_number;
57 std::string column_name;
58 std::string column_type;
59 std::string column_input_data;
60 std::string m_error_mesg{};
61 std::string m_table_name{};
62 size_t m_bytes;
63
64 std::ostream &print(std::ostream &out) const;
65};
66
68 std::ostream &out) const {
69 out << "[Bulk_load_error_location_details: filename=" << filename
70 << ", column_name=" << column_name << "]";
71 return out;
72}
73
74/** Overloading the global output operator to print objects of type
75Bulk_load_error_location_details.
76@param[in] out output stream
77@param[in] obj object to be printed
78@return given output stream. */
79inline std::ostream &operator<<(std::ostream &out,
81 return obj.print(out);
82}
83
85 /** Column data. */
86 const char *m_data_ptr{};
87
88 /** Column data length. */
89 size_t m_data_len{};
90
91 /** Mark the column to be null, by setting length to a special value. This is
92 only used for columns whose state is maintained across chunks
93 (aka fragmented columns). */
94 void set_null() {
95 assert(m_data_ptr == nullptr);
96 m_data_len = std::numeric_limits<size_t>::max();
97 }
98
99 /** Check if the column is null, by checking special value for length.
100 @return true if the column is null, false otherwise. */
101 bool is_null() const {
102 assert(m_data_len != std::numeric_limits<size_t>::max() ||
103 m_data_ptr == nullptr);
104 return m_data_len == std::numeric_limits<size_t>::max();
105 }
106
107 /** Check if the column data is stored externally. If the data is stored
108 externally, then the data length (m_data_len) would be equal to the
109 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
110 reference.
111 @return true if data is stored externally, false otherwise. */
112 bool is_ext() const {
113 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
114 return m_is_ext;
115 }
116
117 /** Check if the column data is stored externally. It is called relaxed,
118 because the column length might not be equal to BLOB_REF_SIZE. Only to
119 be used while the blob is being processed by the CSV parser.
120 @return true if data is stored externally, false otherwise. */
121 bool is_ext_relaxed() const {
122 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
123 return m_is_ext;
124 }
125
126 /** Mark that the column data has been stored externally. */
127 void set_ext() {
128 assert(m_data_len == BLOB_REF_SIZE);
129 m_is_ext = true;
130 }
131
132 /** Initialize the members */
133 void init() {
134 m_data_ptr = nullptr;
135 m_data_len = 0;
136 m_is_ext = false;
137 }
138
139 /** Print this object into the given output stream.
140 @param[in] out output stream into which this object will be printed.
141 @return given output stream */
142 std::ostream &print(std::ostream &out) const;
143
144 std::string to_string() const;
145
146 private:
147 /** If true, the column data is stored externally. */
148 bool m_is_ext{false};
149};
150
151inline std::string Column_text::to_string() const {
153 sout << "[Column_text: len=" << m_data_len;
154 sout << ", val=";
155
156 if (m_data_ptr == nullptr) {
157 sout << "nullptr";
158 } else {
159 for (size_t i = 0; i < m_data_len; ++i) {
160 const char c = m_data_ptr[i];
161 if (isalnum(c)) {
162 sout << c;
163 } else {
164 sout << ".";
165 }
166 }
167 sout << "[hex=";
168 for (size_t i = 0; i < m_data_len; ++i) {
169 sout << std::setfill('0') << std::setw(2) << std::hex
170 << (int)*(&m_data_ptr[i]);
171 }
172 }
173 sout << "]";
174 return sout.str();
175}
176
177inline std::ostream &Column_text::print(std::ostream &out) const {
178 out << "[Column_text: this=" << static_cast<const void *>(this)
179 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
180 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
181 return out;
182}
183
184/** Overloading the global output operator to print objects of type
185Column_text.
186@param[in] out output stream
187@param[in] obj object to be printed
188@return given output stream. */
189inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
190 return obj.print(out);
191}
192
194 /** Column Data Type */
195 int16_t m_type{};
196
197 /** Column data length. */
198 uint16_t m_data_len{};
199
200 /** If column is NULL. */
201 bool m_is_null{false};
202
203 /** Column data */
204 char *m_data_ptr{};
205
206 /** Column data in integer format. Used only for specific datatype. */
207 uint64_t m_int_data;
208
209 void init() {
210 m_type = 0;
211 m_data_len = 0;
212 m_is_null = false;
213 m_data_ptr = nullptr;
214 m_int_data = 0;
215 }
216
217 std::string to_string() const;
218};
219
220inline std::string Column_mysql::to_string() const {
222 sout << "[Column_mysql: len=" << m_data_len;
223 sout << ", val=";
224
225 switch (m_type) {
226 case MYSQL_TYPE_LONG: {
227 sout << m_int_data;
228 } break;
229 default: {
230 for (size_t i = 0; i < m_data_len; ++i) {
231 const char c = m_data_ptr[i];
232 if (isalnum(c)) {
233 sout << c;
234 } else {
235 sout << ".";
236 }
237 }
238
239 } break;
240 }
241 if (m_type != MYSQL_TYPE_LONG) {
242 sout << "[hex=";
243 for (size_t i = 0; i < m_data_len; ++i) {
244 sout << std::setfill('0') << std::setw(2) << std::hex
245 << (int)*(&m_data_ptr[i]);
246 }
247 sout << "]";
248 }
249 return sout.str();
250}
251
252/** Implements the row and column memory management for parse and load
253operations. We try to pre-allocate the memory contiguously as much as we can
254to maximize the performance.
255
256@tparam Column_type Column_text when used in the CSV context, Column_sql when
257used in the InnoDB context.
258*/
259template <typename Column_type>
261 public:
262 /** Create a new row bunch.
263 @param[in] n_cols number of columns */
264 Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
265
266 /** @return return number of rows in the bunch. */
267 size_t get_num_rows() const { return m_num_rows; }
268
269 /** @return return number of columns in each row. */
270 size_t get_num_cols() const { return m_num_columns; }
271
272 /** Process all columns, invoking callback for each.
273 @param[in] row_index index of the row
274 @param[in] cbk callback function
275 @return true if successful */
276 template <typename F>
277 bool process_columns(size_t row_index, F &&cbk) {
278 assert(row_index < m_num_rows);
279
280 auto row_offset = row_index * m_num_columns;
281 return process_columns_by_offset(row_offset, std::move(cbk));
282 }
283
284 template <typename F>
285 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
286 assert(row_offset + m_num_columns <= m_columns.size());
287
288 for (size_t index = 0; index < m_num_columns; ++index) {
289 bool last_col = (index == m_num_columns - 1);
290 if (!cbk(m_columns[row_offset + index], last_col)) {
291 return false;
292 }
293 }
294 return true;
295 }
296
297 void reset() {
298 for (auto &col : m_columns) {
299 col.init();
300 }
301 }
302
303 /** Get current row offset to access columns.
304 @param[in] row_index row index
305 @return row offset in column vector. */
306 size_t get_row_offset(size_t row_index) const {
307 assert(row_index < m_num_rows);
308 return row_index * m_num_columns;
309 }
310
311 /** Get next row offset from current row offset.
312 @param[in,out] offset row offset
313 @return true if there is a next row. */
314 size_t get_next_row_offset(size_t &offset) const {
315 offset += m_num_columns;
316 return (offset < m_columns.size());
317 }
318
319 /** Get column using row offset and column index.
320 @param[in] row_offset row offset in column vector
321 @param[in] col_index index of the column within row
322 @return column data */
323 Column_type &get_column(size_t row_offset, size_t col_index) {
324 assert(col_index < m_num_columns);
325 assert(row_offset + col_index < m_columns.size());
326 return m_columns[row_offset + col_index];
327 }
328
329 /** Get column using row index and column index.
330 @param[in] row_index index of the row in the bunch
331 @param[in] col_index index of the column within row
332 @return column data */
333 Column_type &get_col(size_t row_index, size_t col_index) {
334 return get_column(get_row_offset(row_index), col_index);
335 }
336
337 /** Get column using the column offset.
338 @param[in] col_offset column offset
339 @return column data */
340 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
341
342 /** Get constant column for reading using row offset and column index.
343 @param[in] row_offset row offset in column vector
344 @param[in] col_index index of the column within row
345 @return column data */
346 const Column_type &read_column(size_t row_offset, size_t col_index) const {
347 assert(col_index < m_num_columns);
348 assert(row_offset + col_index < m_columns.size());
349 return m_columns[row_offset + col_index];
350 }
351
352 /** Set the number of rows. Adjust number of rows base on maximum column
353 storage limit.
354 @param[in,out] n_rows number of rows
355 @return true if successful, false if too many rows or columns. */
356 bool set_num_rows(size_t n_rows) {
357 /* Avoid any overflow during multiplication. */
358 if (n_rows > std::numeric_limits<uint32_t>::max() ||
359 m_num_columns > std::numeric_limits<uint32_t>::max()) {
360 return false;
361 }
362 auto total_cols = (uint64_t)n_rows * m_num_columns;
363
364 if (total_cols > S_MAX_TOTAL_COLS) {
365 return false;
366 }
367
368 m_num_rows = n_rows;
369
370 /* Extend columns if needed. */
371 if (m_columns.size() < total_cols) {
372 m_columns.resize(total_cols);
373 }
374 return true;
375 }
376
377 /** Limit allocation up to 600M columns. This number is rounded up from an
378 * estimate of the number of columns with the max chunk size (1024M). In the
379 * worst case we can have 2 bytes per column so a chunk can contain around
380 * 512M columns, and because of rows that spill over chunk boundaries we
381 * assume we can append a full additional row (which should have at most
382 * 4096 columns). Rounded up to 600M. */
383 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
384
385 private:
386 /** All the columns. */
387 std::vector<Column_type> m_columns;
388
389 /** Number of rows. */
390 size_t m_num_rows{};
391
392 /** Number of columns in each row. */
394};
395
398
399/** Column metadata information. */
401 /** Data comparison method. */
402 enum class Compare {
403 /* Integer comparison */
405 /* Unsigned Integer comparison */
407 /* Binary comparison (memcmp) */
408 BINARY,
409 /* Need to callback to use appropriate comparison function in server. */
410 MYSQL
411 };
412
413 /** @return true if integer type. */
414 bool is_integer() const {
417 }
418
419 /** Based on the column data type check if it can be stored externally.
420 @return true if the column data can be stored externally
421 @return false if the column data cannot be stored externally */
422 bool can_be_stored_externally() const;
423
424 /** Field type. (@ref enum_field_types) */
426
427 /** If column could be NULL. */
429
430 /** If column is part of primary key. */
432
433 /** If the key is descending. */
435
436 /** If the key is prefix of the column. */
438
439 /** If it is fixed length type. */
441
442 /** If it is integer type. */
444
445 /** If it is unsigned integer type. */
447
448 /** Check the row header to find out if it is fixed length. For
449 character data type the row header indicates fixed length. */
451
452 /** If character column length can be kept in one byte. */
454
455 /** The length of column data if fixed. */
456 uint16_t m_fixed_len;
457
458 /** Maximum length of data in bytes. */
459 uint16_t m_max_len;
460
461 /** Index of column in row. */
462 uint16_t m_index;
463
464 /** Byte index in NULL bitmap. */
465 uint16_t m_null_byte;
466
467 /** BIT number in NULL bitmap. */
468 uint16_t m_null_bit;
469
470 /** Character set for char & varchar columns. */
471 const void *m_charset;
472
473 /** Field name */
474 std::string m_field_name;
475
476 /** Print this object into the given output stream.
477 @param[in] out output stream into which object will be printed
478 @return given output stream. */
479 std::ostream &print(std::ostream &out) const;
480
481 std::string get_type_string() const;
482};
483
484inline std::string Column_meta::get_type_string() const {
485 switch (m_type) {
487 return "decimal";
488 case MYSQL_TYPE_TINY:
489 return "tiny";
490 case MYSQL_TYPE_SHORT:
491 return "short";
492 case MYSQL_TYPE_LONG:
493 return "long";
494 case MYSQL_TYPE_FLOAT:
495 return "float";
497 return "double";
498 case MYSQL_TYPE_NULL:
499 return "null";
501 return "timestamp";
503 return "longlong";
504 case MYSQL_TYPE_INT24:
505 return "int";
506 case MYSQL_TYPE_DATE:
507 return "date";
508 case MYSQL_TYPE_TIME:
509 return "time";
511 return "datetime";
512 case MYSQL_TYPE_YEAR:
513 return "year";
515 return "date";
517 return "varchar";
518 case MYSQL_TYPE_BIT:
519 return "bit";
521 return "timestamp";
523 return "datetime";
524 case MYSQL_TYPE_TIME2:
525 return "time";
527 return "typed_array";
529 return "vector";
531 return "invalid";
532 case MYSQL_TYPE_BOOL:
533 return "bool";
534 case MYSQL_TYPE_JSON:
535 return "json";
537 return "decimal";
538 case MYSQL_TYPE_ENUM:
539 return "enum";
540 case MYSQL_TYPE_SET:
541 return "set";
543 return "tiny_blob";
545 return "medium_blob";
547 return "long_blob";
548 case MYSQL_TYPE_BLOB:
549 return "blob";
551 return "var_string";
553 return "string";
555 return "geometry";
556 }
557 return "invalid";
558}
559
561 switch (m_type) {
562 case MYSQL_TYPE_JSON:
566 case MYSQL_TYPE_BLOB:
569 return true;
570 }
571 default:
572 break;
573 }
574 return false;
575}
576
577inline std::ostream &Column_meta::print(std::ostream &out) const {
578 out << "[Column_meta: m_is_single_byte_len=" << m_is_single_byte_len
579 << ", m_is_fixed_len=" << m_is_fixed_len
580 << ", m_fixed_len=" << m_fixed_len << "]";
581 return out;
582}
583
584/** Overloading the global output operator to print objects of type
585Column_meta.
586@param[in] out output stream
587@param[in] obj object to be printed
588@return given output stream. */
589inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
590 return obj.print(out);
591}
592
593/** Row metadata */
594struct Row_meta {
595 /** Key type for fast comparison. */
596 enum class Key_type {
597 /* All Keys are signed integer an ascending. */
599 /* All keys are integer. */
600 INT,
601 /* Keys are of any supported type. */
602 ANY
603 };
604 /** All columns in a row are arranged with key columns first. */
605 std::vector<Column_meta> m_columns;
606
607 /** All columns in a row arranged as per col_index. */
608 std::vector<const Column_meta *> m_columns_text_order;
609
610 /** Get the meta data of the column.
611 @param[in] col_index the index of the column as it appears in CSV file.
612 @return a reference to the column meta data.*/
613 const Column_meta &get_column_meta(size_t col_index) const {
614 assert(col_index < m_columns_text_order.size());
615 assert(col_index == m_columns_text_order[col_index]->m_index);
616 return *m_columns_text_order[col_index];
617 }
618
619 /** Total bitmap header length for the row. */
620 size_t m_bitmap_length = 0;
621
622 /** Total header length. */
623 size_t m_header_length = 0;
624
625 /** Length of the first key column. Helps to get the row pointer from first
626 key data pointer. */
627 size_t m_first_key_len = 0;
628
629 /** Key length in bytes for non-integer keys. This is required to estimate
630 the space required to save keys. */
631 size_t m_key_length = 0;
632
633 /** Number of columns used in primary key. */
634 uint32_t m_keys = 0;
635
636 /** Number of columns not used in primary Key. */
637 uint32_t m_non_keys = 0;
638
639 /** Key type for comparison. */
641
642 /** Total number of columns. A key could be on a column prefix.
643 m_columns <= m_keys + m_non_keys */
644 uint32_t m_num_columns = 0;
645
646 /** Approximate row length. */
648
649 /** Number of columns that can be stored externally. */
650 size_t m_n_blob_cols{0};
651};
652
653namespace Bulk_load {
654
657 public:
658 void KeyTooBig() const override;
659 void ValueTooBig() const override;
660 void TooDeep() const override;
661 void InvalidJson() const override;
662 void InternalError(const char *message) const override;
663 bool CheckStack() const override;
664
665 const char *c_str() const { return m_error.c_str(); }
666
667 std::string get_error() const { return m_error; }
668
669 private:
670 mutable std::string m_error{};
671};
672
674 m_error = "Key is too big";
675}
676
678 m_error = "Value is too big";
679}
680
682 m_error = "JSON document has more nesting levels than supported";
683}
685 m_error = "Invalid JSON value is encountered";
686}
688 const char *message [[maybe_unused]]) const {
689 m_error = message;
690 m_error += " (Internal Error)";
691}
692
694 return false;
695}
696
697/** Callbacks for collecting time statistics */
699 /* Operation begin. */
700 std::function<void()> m_fn_begin;
701 /* Operation end. */
702 std::function<void()> m_fn_end;
703};
704
705} // namespace Bulk_load
706
707/** Bulk Data conversion. */
708BEGIN_SERVICE_DEFINITION(bulk_data_convert)
709/** Convert row from text format for MySQL column format. Convert as many
710rows as possible consuming the data buffer starting form next_index. On
711output next_index is the next row index that is not yet consumed. If it
712matches the size of input text_rows, then all rows are consumed.
713@param[in,out] thd session THD
714@param[in] table MySQL TABLE
715@param[in] text_rows rows with column in text
716@param[in,out] next_index next_index in text_rows to be processed
717@param[in,out] buffer data buffer for keeping sql row data
718@param[in,out] buffer_length length of the data buffer
719@param[in] charset input row data character set
720@param[in] metadata row metadata
721@param[out] sql_rows rows with column in MySQL column format
722@return error code. */
724 (THD * thd, const TABLE *table, const Rows_text &text_rows,
725 size_t &next_index, char *buffer, size_t &buffer_length,
726 const CHARSET_INFO *charset, const Row_meta &metadata,
727 Rows_mysql &sql_rows,
729
730/** Convert row to MySQL column format from raw form
731@param[in,out] buffer input raw data buffer
732@param[in] buffer_length buffer length
733@param[in] metadata row metadata
734@param[in] start_index start row index in row bunch
735@param[out] consumed_length length of buffer consumed
736@param[in,out] sql_rows row bunch to fill data
737@return error code. */
739 (char *buffer, size_t buffer_length, const Row_meta &metadata,
740 size_t start_index, size_t &consumed_length,
741 Rows_mysql &sql_rows));
742
743/** Convert row to MySQL column format using the key
744@param[in] metadata row metadata
745@param[in] sql_keys Key bunch
746@param[in] key_offset offset for the key
747@param[in,out] sql_rows row bunch to fill data
748@param[in] sql_index index of the row to be filled
749@return error code. */
751 (const Row_meta &metadata, const Rows_mysql &sql_keys,
752 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
753
754/** Check if session is interrupted.
755@param[in,out] thd session THD
756@return true if connection or statement is killed. */
758
759/** Compare two key columns
760@param[in] key1 first key
761@param[in] key2 second key
762@param[in] col_meta column meta information
763@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
765 (const Column_mysql &key1, const Column_mysql &key2,
766 const Column_meta &col_meta));
767
768/** Get Table row metadata.
769@param[in,out] thd session THD
770@param[in] table MySQL TABLE
771@param[in] have_key include Primary Key metadata
772@param[out] metadata Metadata
773@return true if successful. */
775 (THD * thd, const TABLE *table, bool have_key,
776 Row_meta &metadata));
777
778END_SERVICE_DEFINITION(bulk_data_convert)
779
780/** Column metadata information. */
781/* Bulk data load to SE. */
783/** Begin Loading bulk data to SE.
784@param[in,out] thd session THD
785@param[in] table MySQL TABLE
786@param[in] data_size total data size to load
787@param[in] memory SE memory to be used
788@param[in] num_threads Number of concurrent threads
789@return SE bulk load context or nullptr in case of an error. */
790DECLARE_METHOD(void *, begin,
791 (THD * thd, const TABLE *table, size_t data_size, size_t memory,
792 size_t num_threads));
793
794/** Load a set of rows to SE table by one thread.
795@param[in,out] thd session THD
796@param[in,out] ctx SE load context returned by begin()
797@param[in] table MySQL TABLE
798@param[in] sql_rows row data to load
799@param[in] thread current thread number
800@param[in] wait_cbks wait stat callbacks
801@return true if successful. */
802DECLARE_METHOD(bool, load,
803 (THD * thd, void *ctx, const TABLE *table,
804 const Rows_mysql &sql_rows, size_t thread,
805 Bulk_load::Stat_callbacks &wait_cbks));
806
807/** Create a blob context object to insert a blob.
808@param[in,out] thd session THD
809@param[in,out] load_ctx SE load context returned by begin()
810@param[in] table MySQL TABLE
811@param[out] blob_ctx a blob context object to insert a blob.
812@param[out] blobref buffer to hold blob reference
813@param[in] thread current thread number
814@return true if successful. */
816 (THD * thd, void *load_ctx, const TABLE *table,
817 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
818
819/** Write data into a blob
820@param[in,out] thd session THD
821@param[in,out] load_ctx SE load context returned by begin()
822@param[in] table MySQL TABLE
823@param[in] blob_ctx a blob context object to insert a blob.
824@param[out] blobref buffer to hold blob reference
825@param[in] thread current thread number
826@param[in] data blob data to be written
827@param[in] data_len length of blob data to be written (in bytes);
828@return true if successful. */
830 (THD * thd, void *load_ctx, const TABLE *table,
831 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
832 const unsigned char *data, size_t data_len));
833
834/** Close the blob
835@param[in,out] thd session THD
836@param[in,out] load_ctx SE load context returned by begin()
837@param[in] table MySQL TABLE
838@param[in] blob_ctx a blob context object to insert a blob.
839@param[out] blobref buffer to hold blob reference
840@param[in] thread current thread number
841@return true if successful. */
843 (THD * thd, void *load_ctx, const TABLE *table,
844 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
845
846/** End Loading bulk data to SE.
847
848Called at the end of bulk load execution, even if begin or load calls failed.
849
850@param[in,out] thd session THD
851@param[in,out] ctx SE load context
852@param[in] table MySQL TABLE
853@param[in] error true, if exiting after error
854@return true if successful. */
855DECLARE_METHOD(bool, end,
856 (THD * thd, void *ctx, const TABLE *table, bool error));
857
858/** Check if a table is supported by the bulk load implementation.
859@param[in,out] thd session THD
860@param[in] table MySQL TABLE
861@return true if table is supported. */
863
864/** Get available buffer pool memory for bulk load operations.
865@param[in,out] thd session THD
866@param[in] table MySQL TABLE
867@return buffer pool memory available for bulk load. */
869
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:52
void * Blob_context
Definition: bulk_data_service.h:49
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:79
Definition: bulk_data_service.h:656
const char * c_str() const
Definition: bulk_data_service.h:665
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:673
std::string get_error() const
Definition: bulk_data_service.h:667
std::string m_error
Definition: bulk_data_service.h:670
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:687
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:677
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:681
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:684
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:693
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:260
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:356
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:387
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:314
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:340
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:277
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:285
size_t get_num_cols() const
Definition: bulk_data_service.h:270
void reset()
Definition: bulk_data_service.h:297
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:390
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:306
size_t get_num_rows() const
Definition: bulk_data_service.h:267
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:346
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:333
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:264
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:383
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:393
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:323
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_BOOL
Currently just a placeholder.
Definition: field_types.h:79
@ MYSQL_TYPE_TIME2
Internal to MySQL.
Definition: field_types.h:75
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONGLONG
Definition: field_types.h:64
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_VAR_STRING
Definition: field_types.h:88
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_TINY
Definition: field_types.h:57
@ MYSQL_TYPE_TIME
Definition: field_types.h:67
@ MYSQL_TYPE_SET
Definition: field_types.h:83
@ MYSQL_TYPE_NEWDATE
Internal to MySQL.
Definition: field_types.h:70
@ MYSQL_TYPE_VECTOR
Definition: field_types.h:77
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_STRING
Definition: field_types.h:89
@ MYSQL_TYPE_NULL
Definition: field_types.h:62
@ MYSQL_TYPE_ENUM
Definition: field_types.h:82
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_BIT
Definition: field_types.h:72
@ MYSQL_TYPE_INVALID
Definition: field_types.h:78
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_NEWDECIMAL
Definition: field_types.h:81
@ MYSQL_TYPE_DECIMAL
Definition: field_types.h:56
@ MYSQL_TYPE_TYPED_ARRAY
Used for replication only.
Definition: field_types.h:76
@ MYSQL_TYPE_DOUBLE
Definition: field_types.h:61
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
@ MYSQL_TYPE_DATETIME2
Internal to MySQL.
Definition: field_types.h:74
@ MYSQL_TYPE_SHORT
Definition: field_types.h:58
@ MYSQL_TYPE_DATE
Definition: field_types.h:66
@ MYSQL_TYPE_FLOAT
Definition: field_types.h:60
@ MYSQL_TYPE_TIMESTAMP
Definition: field_types.h:63
@ MYSQL_TYPE_INT24
Definition: field_types.h:65
@ MYSQL_TYPE_DATETIME
Definition: field_types.h:68
@ MYSQL_TYPE_TIMESTAMP2
Definition: field_types.h:73
@ MYSQL_TYPE_YEAR
Definition: field_types.h:69
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2438
static uint16 key1[1001]
Definition: hp_test2.cc:50
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1710
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1739
bool get_row_metadata(THD *, const TABLE *table, bool have_key, Row_meta &metadata) noexcept
Definition: bulk_data_service.cc:1964
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1689
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1791
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2147
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2255
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2157
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2165
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2259
Definition: bulk_data_service.h:653
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
Definition: aligned_atomic.h:44
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:698
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:700
std::function< void()> m_fn_end
Definition: bulk_data_service.h:702
Definition: bulk_data_service.h:54
std::string filename
Definition: bulk_data_service.h:55
std::string m_table_name
Definition: bulk_data_service.h:61
size_t m_bytes
Definition: bulk_data_service.h:62
size_t row_number
Definition: bulk_data_service.h:56
std::string column_input_data
Definition: bulk_data_service.h:59
std::string column_name
Definition: bulk_data_service.h:57
std::string m_error_mesg
Definition: bulk_data_service.h:60
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:67
std::string column_type
Definition: bulk_data_service.h:58
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:400
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:437
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:425
std::string m_field_name
Field name.
Definition: bulk_data_service.h:474
std::string get_type_string() const
Definition: bulk_data_service.h:484
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:462
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:453
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:465
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:434
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:443
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:456
bool is_integer() const
Definition: bulk_data_service.h:414
Compare
Data comparison method.
Definition: bulk_data_service.h:402
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:459
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:440
bool m_is_key
If column is part of primary key.
Definition: bulk_data_service.h:431
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:468
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:560
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:450
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:577
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:428
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:446
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:471
Definition: bulk_data_service.h:193
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:207
std::string to_string() const
Definition: bulk_data_service.h:220
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:201
char * m_data_ptr
Column data.
Definition: bulk_data_service.h:204
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:195
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:198
void init()
Definition: bulk_data_service.h:209
Definition: bulk_data_service.h:84
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:101
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:121
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:148
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:94
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:177
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:86
void init()
Initialize the members.
Definition: bulk_data_service.h:133
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:127
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:112
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:89
std::string to_string() const
Definition: bulk_data_service.h:151
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:594
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:613
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:650
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:620
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:627
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:623
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:596
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:637
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:644
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:634
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:631
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:605
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:608
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:640
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:647
Definition: table.h:1421