MySQL 9.6.0
Source Code Documentation
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <atomic>
35#include <cstdint>
36#include <cstring>
37#include <functional>
38#include <iomanip>
39#include <iostream>
40#include <limits>
41#include <memory>
42#include <mutex>
43#include <optional>
44#include <sstream>
45#include <string>
46#include <vector>
47#include "field_types.h"
49
50class THD;
51struct TABLE;
52struct CHARSET_INFO;
53using Blob_context = void *;
54
55/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
56constexpr size_t BLOB_REF_SIZE = 20;
57
59 std::string filename;
60 size_t row_number;
61 std::string column_name;
62 std::string column_type;
63 std::string column_input_data;
64 std::string m_error_mesg{};
65 std::string m_table_name{};
66 size_t m_bytes;
68
69 std::ostream &print(std::ostream &out) const;
70};
71
73 std::ostream &out) const {
74 out << "[Bulk_load_error_location_details: filename=" << filename
75 << ", column_name=" << column_name << "]";
76 return out;
77}
78
79/** Overloading the global output operator to print objects of type
80Bulk_load_error_location_details.
81@param[in] out output stream
82@param[in] obj object to be printed
83@return given output stream. */
84inline std::ostream &operator<<(std::ostream &out,
86 return obj.print(out);
87}
88
90 /** Column data. */
91 const char *m_data_ptr{};
92
93 /** Column data length. */
94 size_t m_data_len{};
95
96 /** Check if it is DB_ROW_ID column based on the value it contains.
97 @return true if it is DB_ROW_ID column, false otherwise */
98 bool is_row_id() const { return m_row_id != UINT64_MAX; }
99
100 /** The generated DB_ROW_ID value */
101 uint64_t m_row_id{UINT64_MAX};
102
103 /** Mark the column to be null, by setting length to a special value. This is
104 only used for columns whose state is maintained across chunks
105 (aka fragmented columns). */
106 void set_null() {
107 assert(m_data_ptr == nullptr);
109 }
110
111 /** Check if the column is null, by checking special value for length.
112 @return true if the column is null, false otherwise. */
113 bool is_null() const {
115 m_data_ptr == nullptr);
117 }
118
119 /** Check if the column data is stored externally. If the data is stored
120 externally, then the data length (m_data_len) would be equal to the
121 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
122 reference.
123 @return true if data is stored externally, false otherwise. */
124 bool is_ext() const {
125 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
126 return m_is_ext;
127 }
128
129 /** Check if the column data is stored externally. It is called relaxed,
130 because the column length might not be equal to BLOB_REF_SIZE. Only to
131 be used while the blob is being processed by the CSV parser.
132 @return true if data is stored externally, false otherwise. */
133 bool is_ext_relaxed() const {
134 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
135 return m_is_ext;
136 }
137
138 /** Mark that the column data has been stored externally. */
139 void set_ext() {
140 assert(m_data_len == BLOB_REF_SIZE);
141 m_is_ext = true;
142 }
143
144 /** Initialize the members */
145 void init() {
146 m_data_ptr = nullptr;
147 m_data_len = 0;
148 m_is_ext = false;
149 m_row_id = UINT64_MAX;
150 }
151
152 /** Print this object into the given output stream.
153 @param[in] out output stream into which this object will be printed.
154 @return given output stream */
155 std::ostream &print(std::ostream &out) const;
156
157 std::string to_string() const;
158
159 private:
160 /** If true, the column data is stored externally. */
161 bool m_is_ext{false};
162};
163
164inline std::string Column_text::to_string() const {
166 sout << "[Column_text: len=" << m_data_len;
167 sout << ", val=";
168
169 if (m_data_ptr == nullptr) {
170 sout << "nullptr";
171 } else {
172 for (size_t i = 0; i < m_data_len; ++i) {
173 const char c = m_data_ptr[i];
174 if (isalnum(c)) {
175 sout << c;
176 } else {
177 sout << ".";
178 }
179 }
180 sout << "[hex=";
181 for (size_t i = 0; i < m_data_len; ++i) {
182 sout << std::setfill('0') << std::setw(2) << std::hex
183 << (int)*(&m_data_ptr[i]);
184 }
185 }
186 sout << "]";
187 return sout.str();
188}
189
190inline std::ostream &Column_text::print(std::ostream &out) const {
191 out << "[Column_text: this=" << static_cast<const void *>(this)
192 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
193 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
194 return out;
195}
196
197/** Overloading the global output operator to print objects of type
198Column_text.
199@param[in] out output stream
200@param[in] obj object to be printed
201@return given output stream. */
202inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
203 return obj.print(out);
204}
205
206struct Row_meta;
207
209 /** Column Data Type */
210 int16_t m_type{};
211
212 /** Column data length. */
213 uint16_t m_data_len{};
214
215 /** If column is NULL. */
216 bool m_is_null{false};
217
218 char *get_data() const { return m_is_null ? nullptr : m_data_ptr; }
219
220 void set_data(char *ptr) { m_data_ptr = ptr; }
221
222 /** Save the beginning of the row pointer in this object. This should be
223 called only when the column is null.
224 @param[in] row_begin pointer to beginning of row.*/
225 void row(char *row_begin) {
226 assert(m_is_null);
227 m_data_len = 0;
228 m_data_ptr = row_begin;
229 }
230
231 /** Get the pointer to the beginning of row. This is valid only if the
232 column is null. This should be called on the first column of the row. There
233 is no need to call this on other columns.
234 @param[in] row_meta meta data information about the row
235 @param[in] col_index Index of the first column which is 0.
236 @return pointer to row beginning. */
237 char *get_row_begin(const Row_meta &row_meta,
238 size_t col_index [[maybe_unused]]) const;
239
240 /** Column data in integer format. Used only for specific datatype. */
241 uint64_t m_int_data;
242
243 void init() {
244 m_type = 0;
245 m_data_len = 0;
246 m_is_null = false;
247 m_data_ptr = nullptr;
248 m_int_data = 0;
249 }
250
251 std::string to_string() const;
252
253 private:
254 /** Column data or row begin. There is a need to fetch the beginning of
255 the row from the vector of Column_mysql. But in the case of secondary
256 indexes, all the keys could be null and it becomes impossible to obtain
257 the pointer to beginning of the row. To solve this problem, I am re-using
258 this pointer to hold the row begin when the column is null. So it becomes
259 important to make use of m_is_null to check if the column is null. It is NOT
260 correct to check this pointer against nullptr to confirm if column is null.*/
261 char *m_data_ptr{nullptr};
262};
263
264inline std::string Column_mysql::to_string() const {
266 sout << "[Column_mysql: type=" << m_type << ", len=" << m_data_len
267 << ", m_int_data=" << m_int_data;
268 sout << ", val=";
269
270 switch (m_type) {
271 case MYSQL_TYPE_LONG: {
272 sout << m_int_data;
273 } break;
274 default: {
275 for (size_t i = 0; i < m_data_len; ++i) {
276 const char c = m_data_ptr[i];
277 if (isalnum(c)) {
278 sout << c;
279 } else {
280 sout << ".";
281 }
282 }
283
284 } break;
285 }
286 if (m_type != MYSQL_TYPE_LONG) {
287 sout << "[hex=";
288 for (size_t i = 0; i < m_data_len; ++i) {
289 sout << std::setfill('0') << std::setw(2) << std::hex
290 << (int)*(&m_data_ptr[i]);
291 }
292 sout << "]";
293 }
294 return sout.str();
295}
296
297/** Implements the row and column memory management for parse and load
298operations. We try to pre-allocate the memory contiguously as much as we can
299to maximize the performance.
300
301@tparam Column_type Column_text when used in the CSV context, Column_sql when
302used in the InnoDB context.
303*/
304template <typename Column_type>
306 public:
307 /** Create a new row bunch.
308 @param[in] n_cols number of columns */
309 explicit Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
310
311 /** @return return number of rows in the bunch. */
312 size_t get_num_rows() const { return m_num_rows; }
313
314 /** @return return number of columns in each row. */
315 size_t get_num_cols() const { return m_num_columns; }
316
317 /** Process all columns, invoking callback for each.
318 @param[in] row_index index of the row
319 @param[in] cbk callback function
320 @return true if successful */
321 template <typename F>
322 bool process_columns(size_t row_index, F &&cbk) {
323 assert(row_index < m_num_rows);
324
325 auto row_offset = row_index * m_num_columns;
326 return process_columns_by_offset(row_offset, std::move(cbk));
327 }
328
329 template <typename F>
330 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
331 assert(row_offset + m_num_columns <= m_columns.size());
332
333 for (size_t index = 0; index < m_num_columns; ++index) {
334 bool last_col = (index == m_num_columns - 1);
335 if (!cbk(m_columns[row_offset + index], last_col)) {
336 return false;
337 }
338 }
339 return true;
340 }
341
342 void reset() {
343 for (auto &col : m_columns) {
344 col.init();
345 }
346 }
347
348 /** Get current row offset to access columns.
349 @param[in] row_index row index
350 @return row offset in column vector. */
351 size_t get_row_offset(size_t row_index) const {
352 assert(row_index < m_num_rows);
353 return row_index * m_num_columns;
354 }
355
356 /** Get next row offset from current row offset.
357 @param[in,out] offset row offset
358 @return true if there is a next row. */
359 size_t get_next_row_offset(size_t &offset) const {
360 offset += m_num_columns;
361 return (offset < m_columns.size());
362 }
363
364 /** Get column using row offset and column index.
365 @param[in] row_offset row offset in column vector
366 @param[in] col_index index of the column within row
367 @return column data */
368 Column_type &get_column(size_t row_offset, size_t col_index) {
369 assert(col_index < m_num_columns);
370 assert(row_offset + col_index < m_columns.size());
371 return m_columns[row_offset + col_index];
372 }
373
374 /** Get column using row index and column index.
375 @param[in] row_index index of the row in the bunch
376 @param[in] col_index index of the column within row
377 @return column data */
378 Column_type &get_col(size_t row_index, size_t col_index) {
379 return get_column(get_row_offset(row_index), col_index);
380 }
381
382 /** Get column using the column offset.
383 @param[in] col_offset column offset
384 @return column data */
385 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
386
387 /** Get constant column for reading using row offset and column index.
388 @param[in] row_offset row offset in column vector
389 @param[in] col_index index of the column within row
390 @return column data */
391 const Column_type &read_column(size_t row_offset, size_t col_index) const {
392 assert(col_index < m_num_columns);
393 assert(row_offset + col_index < m_columns.size());
394 return m_columns[row_offset + col_index];
395 }
396
397 /** Set the number of rows. Adjust number of rows base on maximum column
398 storage limit.
399 @param[in,out] n_rows number of rows
400 @return true if successful, false if too many rows or columns. */
401 bool set_num_rows(size_t n_rows) {
402 /* Avoid any overflow during multiplication. */
403 if (n_rows > std::numeric_limits<uint32_t>::max() ||
405 return false;
406 }
407 auto total_cols = (uint64_t)n_rows * m_num_columns;
408
409 if (total_cols > S_MAX_TOTAL_COLS) {
410 return false;
411 }
412
413 m_num_rows = n_rows;
414
415 /* Extend columns if needed. */
416 if (m_columns.size() < total_cols) {
417 m_columns.resize(total_cols);
418 }
419 return true;
420 }
421
422 /** Limit allocation up to 600M columns. This number is rounded up from an
423 * estimate of the number of columns with the max chunk size (1024M). In the
424 * worst case we can have 2 bytes per column so a chunk can contain around
425 * 512M columns, and because of rows that spill over chunk boundaries we
426 * assume we can append a full additional row (which should have at most
427 * 4096 columns). Rounded up to 600M. */
428 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
429
430 private:
431 /** All the columns. */
432 std::vector<Column_type> m_columns;
433
434 /** Number of rows. */
435 size_t m_num_rows{};
436
437 /** Number of columns in each row. */
439};
440
443
444/** Column metadata information. */
446 /** Data comparison method. */
447 enum class Compare {
448 /* Integer comparison */
450 /* Unsigned Integer comparison */
452 /* Binary comparison (memcmp) */
453 BINARY,
454 /* Need to callback to use appropriate comparison function in server. */
455 MYSQL
456 };
457
458 std::string get_compare_string() const {
459 switch (m_compare) {
461 return "INTEGER_SIGNED";
463 return "INTEGER_UNSIGNED";
464 case Compare::BINARY:
465 return "BINARY";
466 case Compare::MYSQL:
467 return "MYSQL";
468 }
469 assert(0);
470 return "INVALID";
471 }
472
473 /** @return true if integer type. */
474 bool is_integer() const {
477 }
478
479 /** Based on the column data type check if it can be stored externally.
480 @return true if the column data can be stored externally
481 @return false if the column data cannot be stored externally */
482 bool can_be_stored_externally() const;
483
484 /** true if this column is part of secondary index. */
485 bool m_is_part_of_sk{false};
486
487 /** Field type. (@ref enum_field_types) */
489
490 /** If column could be NULL. */
491 bool m_is_nullable{false};
492
493 /** true if column belongs to primary index (key or non-key) */
494 bool m_is_pk{false};
495
496 /** true if column is a key for primary or secondary index. */
497 bool m_is_key{false};
498
499 /** If the key is descending. */
500 bool m_is_desc_key{false};
501
502 /** If the key is prefix of the column. */
503 bool m_is_prefix_key{false};
504
505 /** If it is fixed length type. */
506 bool m_is_fixed_len{false};
507
508 /** If it is integer type. */
510
511 /** If it is unsigned integer type. */
512 bool m_is_unsigned{false};
513
514 /** Check the row header to find out if it is fixed length. For
515 character data type the row header indicates fixed length. */
517
518 /** If character column length can be kept in one byte. */
520
521 /** The length of column data if fixed. */
522 uint16_t m_fixed_len;
523
524 /** Maximum length of data in bytes. */
525 uint16_t m_max_len;
526
527 /** Index of column in row. */
528 uint16_t m_index;
529
530 /** Position of column in table. Refer to Field::field_index() */
532
533 /** Byte index in NULL bitmap. */
534 uint16_t m_null_byte;
535
536 /** BIT number in NULL bitmap. */
537 uint16_t m_null_bit;
538
539 /** Character set for char & varchar columns. */
540 const void *m_charset;
541
542 /** Field name */
543 std::string m_field_name;
544
545 /** Get a string representation of Column_meta object. Useful only for
546 debugging purposes.
547 @see Column_meta
548 @return string representation of this object. */
549 std::string to_string() const;
550
551 /** Print this object into the given output stream.
552 @param[in] out output stream into which object will be printed
553 @return given output stream. */
554 std::ostream &print(std::ostream &out) const;
555
556 /** Get the data type of the column as a string.
557 @return data type of the column as a string. */
558 std::string get_type_string() const;
559};
560
561inline std::string Column_meta::get_type_string() const {
562 switch (m_type) {
564 return "decimal";
565 case MYSQL_TYPE_TINY:
566 return "tiny";
567 case MYSQL_TYPE_SHORT:
568 return "short";
569 case MYSQL_TYPE_LONG:
570 return "long";
571 case MYSQL_TYPE_FLOAT:
572 return "float";
574 return "double";
575 case MYSQL_TYPE_NULL:
576 return "null";
578 return "timestamp";
580 return "longlong";
581 case MYSQL_TYPE_INT24:
582 return "int";
583 case MYSQL_TYPE_DATE:
584 return "date";
585 case MYSQL_TYPE_TIME:
586 return "time";
588 return "datetime";
589 case MYSQL_TYPE_YEAR:
590 return "year";
592 return "date";
594 return "varchar";
595 case MYSQL_TYPE_BIT:
596 return "bit";
598 return "timestamp";
600 return "datetime";
601 case MYSQL_TYPE_TIME2:
602 return "time";
604 return "typed_array";
606 return "vector";
608 return "invalid";
609 case MYSQL_TYPE_BOOL:
610 return "bool";
611 case MYSQL_TYPE_JSON:
612 return "json";
614 return "decimal";
615 case MYSQL_TYPE_ENUM:
616 return "enum";
617 case MYSQL_TYPE_SET:
618 return "set";
620 return "tiny_blob";
622 return "medium_blob";
624 return "long_blob";
625 case MYSQL_TYPE_BLOB:
626 return "blob";
628 return "var_string";
630 return "string";
632 return "geometry";
633 }
634 return "invalid";
635}
636
638 switch (m_type) {
639 case MYSQL_TYPE_JSON:
644 case MYSQL_TYPE_BLOB:
647 return true;
648 }
649 default:
650 break;
651 }
652 return false;
653}
654
655inline std::string Column_meta::to_string() const {
657 out << "[Column_meta: m_type=" << get_type_string()
658 << ", m_field_name=" << m_field_name << ", m_index=" << m_index
659 << ", m_field_index=" << m_field_index
660 << ", m_is_single_byte_len=" << m_is_single_byte_len
661 << ", m_is_fixed_len=" << m_is_fixed_len
662 << ", m_fixed_len=" << m_fixed_len << ", m_null_byte=" << m_null_byte
663 << ", m_null_bit=" << m_null_bit << ", m_compare=" << get_compare_string()
664 << ", m_is_desc_key=" << m_is_desc_key << ", m_is_key=" << m_is_key
665 << ", m_is_prefix_key=" << m_is_prefix_key << "]";
666 return out.str();
667}
668
669inline std::ostream &Column_meta::print(std::ostream &out) const {
670 out << to_string();
671 return out;
672}
673
674/** Overloading the global output operator to print objects of type
675Column_meta.
676@param[in] out output stream
677@param[in] obj object to be printed
678@return given output stream. */
679inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
680 return obj.print(out);
681}
682
683/** Table metadata. */
685 /** Number of keys/indexes the table has. */
686 size_t m_n_keys;
687
688 /** Key number of the primary key. */
690
691 /** True if generated DB_ROW_ID is the pk. */
692 bool dbrowid_is_pk{false};
693
696
697 /** Table being bulk loaded. */
698 std::string m_table_name;
699};
700
701/** Row metadata */
702struct Row_meta {
703 /** Key type for fast comparison. */
704 enum class Key_type {
705 /* All Keys are signed integer an ascending. */
707 /* All keys are integer. */
708 INT,
709 /* Keys are of any supported type. */
710 ANY
711 };
712 /** All columns in a row are arranged with key columns first. */
713 std::vector<Column_meta> m_columns;
714
715 /** All columns in a row arranged as per col_index. */
716 std::vector<const Column_meta *> m_columns_text_order;
717
718 /** Get a string representation of this Row_meta object.
719 @see Row_meta
720 @return string representation of this object. */
721 std::string to_string() const;
722
723 /** Get the metadata of the given column.
724 @param[in] col_index position of the column in the index.
725 @return metadata of the requested column. */
726 const Column_meta &get_column_meta_index_order(size_t col_index) const {
727 assert(col_index < m_columns.size());
728 return m_columns[col_index];
729 }
730
731 /** Get the meta data of the column.
732 @param[in] col_index the index of the column as it appears in CSV file.
733 @return a reference to the column meta data.*/
734 const Column_meta &get_column_meta(size_t col_index) const {
735 assert(col_index < m_columns_text_order.size());
736 assert(col_index == m_columns_text_order[col_index]->m_index);
737 return *m_columns_text_order[col_index];
738 }
739
740 /** Total bitmap header length for the row. */
741 size_t m_bitmap_length = 0;
742
743 /** Total header length. */
744 size_t m_header_length = 0;
745
746 /** Length of the first key column. Helps to get the row pointer from first
747 key data pointer. */
748 size_t m_first_key_len = 0;
749
750 /** Key length in bytes for non-integer keys. This is required to estimate
751 the space required to save keys. */
752 size_t m_key_length = 0;
753
754 /** Number of columns used in primary key. */
755 uint32_t m_keys = 0;
756
757 /** Number of columns not used in primary Key. */
758 uint32_t m_non_keys = 0;
759
760 /** Key type for comparison. */
762
763 /** Total number of columns. A key could be on a column prefix.
764 m_columns <= m_keys + m_non_keys */
765 uint32_t m_num_columns = 0;
766
767 /** Approximate row length. */
769
770 /** Number of columns that can be stored externally. */
771 size_t m_n_blob_cols{0};
772
773 /** Name of the key */
774 std::string m_name;
775
776 /** true if primary key, false if secondary key. */
777 bool is_pk{false};
778
779 /** true if DB_ROW_ID is the pk, false otherwise. */
780 bool dbrowid_is_pk{false};
781};
782
783inline std::ostream &operator<<(std::ostream &os,
785 switch (key_type) {
787 os << "ANY";
788 break;
790 os << "INT_SIGNED_ASC";
791 break;
793 os << "INT";
794 break;
795 }
796 return os;
797}
798
799inline std::string Row_meta::to_string() const {
801 out << "[Row_meta: m_name=" << m_name << ", m_num_columns=" << m_num_columns
802 << ", m_keys=" << m_keys << ", m_non_keys=" << m_non_keys
803 << ", m_key_length=" << m_key_length << ", m_key_type=" << m_key_type
804 << ", m_approx_row_len=" << m_approx_row_len;
805 for (auto &col_meta : m_columns) {
806 out << col_meta.to_string() << ", ";
807 }
808 out << "]";
809 return out.str();
810}
811
812inline char *Column_mysql::get_row_begin(const Row_meta &row_meta,
813 size_t col_index
814 [[maybe_unused]]) const {
815 assert(m_is_null || col_index == 0);
816 return m_is_null ? m_data_ptr
817 : (m_data_ptr - row_meta.m_first_key_len -
818 row_meta.m_header_length);
819}
820
821namespace Bulk_load {
822
825 public:
826 void KeyTooBig() const override;
827 void ValueTooBig() const override;
828 void TooDeep() const override;
829 void InvalidJson() const override;
830 void InternalError(const char *message) const override;
831 bool CheckStack() const override;
832
833 const char *c_str() const { return m_error.c_str(); }
834
835 std::string get_error() const { return m_error; }
836
837 private:
838 mutable std::string m_error{};
839};
840
842 m_error = "Key is too big";
843}
844
846 m_error = "Value is too big";
847}
848
850 m_error = "JSON document has more nesting levels than supported";
851}
853 m_error = "Invalid JSON value is encountered";
854}
856 const char *message [[maybe_unused]]) const {
857 m_error = message;
858 m_error += " (Internal Error)";
859}
860
862 return false;
863}
864
865/** Callbacks for collecting time statistics */
867 /* Operation begin. */
868 std::function<void()> m_fn_begin;
869 /* Operation end. */
870 std::function<void()> m_fn_end;
871};
872
874 std::pair<std::optional<Rows_mysql>, std::optional<Rows_mysql>>;
875
877 std::string schema;
878 std::string table;
880};
881
882/** Contains the data needed for the ROW_ID generation for tables without
883explicit primary key */
885 /** Get an estimate of the number of rows to be handled by each thread.
886 This will give the number of row ids to be generated by each thread.
887 @return number of rows to be handled by each thread. */
888 size_t get_rows_per_thread() const {
889 size_t estimated_total_rows = m_total_size / m_avg_row_len.load();
890 size_t min_total_rows = 1000;
891
892 if (estimated_total_rows < min_total_rows) {
893 estimated_total_rows = min_total_rows;
894 }
895
896 return (estimated_total_rows + m_n_loaders) / m_n_loaders;
897 }
898
899 /** Get the next available range for generating DB_ROW_ID. The range includes
900 the begin value but excludes the end value.
901 @return the range for row ids for exclusive use by the calling thread. */
902 inline std::pair<uint64_t, uint64_t> get_next_rowid_range() const {
903 std::unique_lock<std::mutex> lock(m_rowid_mutex);
904
905 const uint64_t range_begin = m_next_rowid_range;
907
908 return std::make_pair(range_begin, m_next_rowid_range);
909 }
910
911 void set_begin_rowid_value(size_t row_id) { m_next_rowid_range = row_id; }
912 /* Total data size of CSV files (in bytes) */
914 /* Average row length, updated by the CSV parsing threads for each batch. */
915 std::atomic<size_t> m_avg_row_len;
916 /* Number of loaders aka concurrency in phase 1. */
918
919 private:
920 /** Protects the member m_next_rowid_range */
921 mutable std::mutex m_rowid_mutex;
922
923 /* Number of rows per subtree. */
924 mutable size_t m_next_rowid_range{0};
925};
926
927} // namespace Bulk_load
928
929/** Bulk Data conversion. */
930BEGIN_SERVICE_DEFINITION(bulk_data_convert)
931/** Convert row from text format for MySQL column format. Convert as many
932rows as possible consuming the data buffer starting form next_index. On
933output next_index is the next row index that is not yet consumed. If it
934matches the size of input text_rows, then all rows are consumed.
935@param[in,out] thd session THD
936@param[in] table MySQL TABLE
937@param[in] text_rows rows with column in text
938@param[in,out] next_index next_index in text_rows to be processed
939@param[in,out] buffer data buffer for keeping sql row data
940@param[in,out] buffer_length length of the data buffer
941@param[in] charset input row data character set
942@param[in] metadata row metadata
943@param[out] sql_rows rows with column in MySQL column format
944@return error code. */
946 (THD * thd, const TABLE *table, const Rows_text &text_rows,
947 size_t &next_index, char *buffer, size_t &buffer_length,
948 const CHARSET_INFO *charset, const Row_meta &metadata,
949 Rows_mysql &sql_rows,
951
952/** Convert row to MySQL column format from raw form
953@param[in,out] buffer input raw data buffer
954@param[in] buffer_length buffer length
955@param[in] metadata row metadata
956@param[in] start_index start row index in row bunch
957@param[out] consumed_length length of buffer consumed
958@param[in,out] sql_rows row bunch to fill data
959@return error code. */
961 (char *buffer, size_t buffer_length, const Row_meta &metadata,
962 size_t start_index, size_t &consumed_length,
963 Rows_mysql &sql_rows));
964
965/** Convert row to MySQL column format using the key
966@param[in] metadata row metadata
967@param[in] sql_keys Key bunch
968@param[in] key_offset offset for the key
969@param[in,out] sql_rows row bunch to fill data
970@param[in] sql_index index of the row to be filled
971@return error code. */
973 (const Row_meta &metadata, const Rows_mysql &sql_keys,
974 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
975
976/** Check if session is interrupted.
977@param[in,out] thd session THD
978@return true if connection or statement is killed. */
980
981/** Compare two key columns
982@param[in] key1 first key
983@param[in] key2 second key
984@param[in] col_meta column meta information
985@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
987 (const Column_mysql &key1, const Column_mysql &key2,
988 const Column_meta &col_meta));
989
990/** Get row metadata information for all the indexes.
991@param[in,out] thd session THD
992@param[in] table MySQL TABLE
993@param[in] have_key include Primary Key metadata
994@param[out] metadata Metadata for each of the indexes.
995@return true if successful. */
997 (THD * thd, const TABLE *table, bool have_key,
998 std::vector<Row_meta> &metadata));
999
1000/** Get table metadata information for the table being bulk loaded.
1001@param[in,out] thd session THD
1002@param[in] table MySQL TABLE
1003@param[out] metadata Metadata of the table.
1004@return true if successful. */
1006 (THD * thd, const TABLE *table, Table_meta &metadata));
1007
1008END_SERVICE_DEFINITION(bulk_data_convert)
1009
1010/** Column metadata information. */
1011/* Bulk data load to SE. */
1013/** Begin Loading bulk data to SE.
1014@param[in,out] thd session THD
1015@param[in] table MySQL TABLE
1016@param[in] keynr key number, identifying the index being loaded.
1017@param[in] data_size total data size to load
1018@param[in] memory SE memory to be used
1019@param[in] num_threads Number of concurrent threads
1020@return SE bulk load context or nullptr in case of an error. */
1021DECLARE_METHOD(void *, begin,
1022 (THD * thd, const TABLE *table, size_t keynr, size_t data_size,
1023 size_t memory, size_t num_threads));
1024
1025/** Load a set of rows to SE table by one thread.
1026@param[in,out] thd session THD
1027@param[in,out] ctx SE load context returned by begin()
1028@param[in] table MySQL TABLE
1029@param[in] sql_rows row data to load
1030@param[in] thread current thread number
1031@param[in] wait_cbks wait stat callbacks
1032@return true if successful. */
1033DECLARE_METHOD(bool, load,
1034 (THD * thd, void *ctx, const TABLE *table,
1035 const Rows_mysql &sql_rows, size_t thread,
1036 Bulk_load::Stat_callbacks &wait_cbks));
1037
1038/** Create a blob context object to insert a blob.
1039@param[in,out] thd session THD
1040@param[in,out] load_ctx SE load context returned by begin()
1041@param[in] table MySQL TABLE
1042@param[out] blob_ctx a blob context object to insert a blob.
1043@param[out] blobref buffer to hold blob reference
1044@param[in] thread current thread number
1045@return true if successful. */
1047 (THD * thd, void *load_ctx, const TABLE *table,
1048 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
1049
1050/** Write data into a blob
1051@param[in,out] thd session THD
1052@param[in,out] load_ctx SE load context returned by begin()
1053@param[in] table MySQL TABLE
1054@param[in] blob_ctx a blob context object to insert a blob.
1055@param[out] blobref buffer to hold blob reference
1056@param[in] thread current thread number
1057@param[in] data blob data to be written
1058@param[in] data_len length of blob data to be written (in bytes);
1059@return true if successful. */
1061 (THD * thd, void *load_ctx, const TABLE *table,
1062 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
1063 const unsigned char *data, size_t data_len));
1064
1065/** Close the blob
1066@param[in,out] thd session THD
1067@param[in,out] load_ctx SE load context returned by begin()
1068@param[in] table MySQL TABLE
1069@param[in] blob_ctx a blob context object to insert a blob.
1070@param[out] blobref buffer to hold blob reference
1071@param[in] thread current thread number
1072@return true if successful. */
1074 (THD * thd, void *load_ctx, const TABLE *table,
1075 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
1076
1077/** End Loading bulk data to SE.
1078
1079Called at the end of bulk load execution, even if begin or load calls failed.
1080
1081@param[in,out] thd session THD
1082@param[in,out] ctx SE load context
1083@param[in] table MySQL TABLE
1084@param[in] error true, if exiting after error
1085@return true if successful. */
1086DECLARE_METHOD(bool, end,
1087 (THD * thd, void *ctx, const TABLE *table, bool error));
1088
1089/** Check if a table is supported by the bulk load implementation.
1090@param[in,out] thd session THD
1091@param[in] table MySQL TABLE
1092@return true if table is supported. */
1094
1095/** Get available buffer pool memory for bulk load operations.
1096@param[in,out] thd session THD
1097@param[in] table MySQL TABLE
1098@return buffer pool memory available for bulk load. */
1100
1101/** Copies data from existing table into the duplicated table during incremental
1102load. This is called after the bulk load component detects we reached the end of
1103the CSV input for the respective sub-loader and it signals that the loader
1104should now iterate through the remainded or the existing data in the original
1105table and migrate it.
1106@param[in,out] ctx SE load context
1107@param[in] table MySQL TABLE
1108@param[in] thread loader thread index
1109@param[in,out] wait_cbks wait stat callbacks
1110@return true if successful, false otherwise. */
1112 (void *ctx, const TABLE *table, size_t thread,
1113 Bulk_load::Stat_callbacks &wait_cbks));
1114
1115/** Sets the source table data (table name and key range boundaries) for all
1116loaders.
1117@param[in,out] ctx SE load context
1118@param[in] table MySQL TABLE
1119@param[in] source_table_data vector containing the source table data
1120@return true if successful, false otherwise. */
1123 (void *ctx, const TABLE *table,
1124 const std::vector<Bulk_load::Source_table_data> &source_table_data));
1125
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:247
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:56
void * Blob_context
Definition: bulk_data_service.h:53
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:84
Definition: bulk_data_service.h:824
const char * c_str() const
Definition: bulk_data_service.h:833
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:841
std::string get_error() const
Definition: bulk_data_service.h:835
std::string m_error
Definition: bulk_data_service.h:838
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:855
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:845
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:849
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:852
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:861
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:305
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:401
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:432
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:359
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:385
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:322
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:330
size_t get_num_cols() const
Definition: bulk_data_service.h:315
void reset()
Definition: bulk_data_service.h:342
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:435
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:351
size_t get_num_rows() const
Definition: bulk_data_service.h:312
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:391
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:378
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:309
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:428
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:438
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:368
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_BOOL
Currently just a placeholder.
Definition: field_types.h:79
@ MYSQL_TYPE_TIME2
Internal to MySQL.
Definition: field_types.h:75
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONGLONG
Definition: field_types.h:64
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_VAR_STRING
Definition: field_types.h:88
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_TINY
Definition: field_types.h:57
@ MYSQL_TYPE_TIME
Definition: field_types.h:67
@ MYSQL_TYPE_SET
Definition: field_types.h:83
@ MYSQL_TYPE_NEWDATE
Internal to MySQL.
Definition: field_types.h:70
@ MYSQL_TYPE_VECTOR
Definition: field_types.h:77
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_STRING
Definition: field_types.h:89
@ MYSQL_TYPE_NULL
Definition: field_types.h:62
@ MYSQL_TYPE_ENUM
Definition: field_types.h:82
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_BIT
Definition: field_types.h:72
@ MYSQL_TYPE_INVALID
Definition: field_types.h:78
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_NEWDECIMAL
Definition: field_types.h:81
@ MYSQL_TYPE_DECIMAL
Definition: field_types.h:56
@ MYSQL_TYPE_TYPED_ARRAY
Used for replication only.
Definition: field_types.h:76
@ MYSQL_TYPE_DOUBLE
Definition: field_types.h:61
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
@ MYSQL_TYPE_DATETIME2
Internal to MySQL.
Definition: field_types.h:74
@ MYSQL_TYPE_SHORT
Definition: field_types.h:58
@ MYSQL_TYPE_DATE
Definition: field_types.h:66
@ MYSQL_TYPE_FLOAT
Definition: field_types.h:60
@ MYSQL_TYPE_TIMESTAMP
Definition: field_types.h:63
@ MYSQL_TYPE_INT24
Definition: field_types.h:65
@ MYSQL_TYPE_DATETIME
Definition: field_types.h:68
@ MYSQL_TYPE_TIMESTAMP2
Definition: field_types.h:73
@ MYSQL_TYPE_YEAR
Definition: field_types.h:69
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2435
static uint16 key1[1001]
Definition: hp_test2.cc:50
#define F
Definition: jit_executor_value.cc:374
#define UINT16_MAX
Definition: lexyy.cc:83
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1858
bool get_table_metadata(THD *thd, const TABLE *table, Table_meta &table_meta) noexcept
Definition: bulk_data_service.cc:2530
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1889
bool get_row_metadata_all(THD *thd, const TABLE *table, bool have_key, std::vector< Row_meta > &row_meta_all) noexcept
Definition: bulk_data_service.cc:2551
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1799
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1941
bool copy_existing_data(void *ctx, const TABLE *duplicate_table, size_t thread, Bulk_load::Stat_callbacks &wait_cbks) noexcept
Definition: bulk_data_service.cc:2731
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2617
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2725
bool set_source_table_data(void *ctx, const TABLE *duplicate_table, const std::vector< Bulk_load::Source_table_data > &source_table_data) noexcept
Definition: bulk_data_service.cc:2740
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2627
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2635
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2745
Definition: bulk_data_service.h:821
std::pair< std::optional< Rows_mysql >, std::optional< Rows_mysql > > Read_range
Definition: bulk_data_service.h:874
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
std::string hex(const Container &c)
Definition: hex.h:61
bool index(const std::string &value, const String &search_for, uint32_t *idx)
Definition: contains.h:76
int key_type
Definition: method.h:38
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
Define std::hash<Gtid>.
Definition: gtid.h:355
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2876
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2880
static std::mutex lock
Definition: net_ns.cc:56
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Contains the data needed for the ROW_ID generation for tables without explicit primary key.
Definition: bulk_data_service.h:884
std::pair< uint64_t, uint64_t > get_next_rowid_range() const
Get the next available range for generating DB_ROW_ID.
Definition: bulk_data_service.h:902
size_t get_rows_per_thread() const
Get an estimate of the number of rows to be handled by each thread.
Definition: bulk_data_service.h:888
std::atomic< size_t > m_avg_row_len
Definition: bulk_data_service.h:915
std::mutex m_rowid_mutex
Protects the member m_next_rowid_range.
Definition: bulk_data_service.h:921
void set_begin_rowid_value(size_t row_id)
Definition: bulk_data_service.h:911
size_t m_n_loaders
Definition: bulk_data_service.h:917
size_t m_total_size
Definition: bulk_data_service.h:913
size_t m_next_rowid_range
Definition: bulk_data_service.h:924
Definition: bulk_data_service.h:876
Read_range range
Definition: bulk_data_service.h:879
std::string table
Definition: bulk_data_service.h:878
std::string schema
Definition: bulk_data_service.h:877
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:866
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:868
std::function< void()> m_fn_end
Definition: bulk_data_service.h:870
Definition: bulk_data_service.h:58
std::string filename
Definition: bulk_data_service.h:59
std::string m_table_name
Definition: bulk_data_service.h:65
size_t m_bytes
Definition: bulk_data_service.h:66
size_t row_number
Definition: bulk_data_service.h:60
std::string column_input_data
Definition: bulk_data_service.h:63
std::string column_name
Definition: bulk_data_service.h:61
size_t m_column_length
Definition: bulk_data_service.h:67
std::string m_error_mesg
Definition: bulk_data_service.h:64
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:72
std::string column_type
Definition: bulk_data_service.h:62
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:445
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:503
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:488
std::string m_field_name
Field name.
Definition: bulk_data_service.h:543
std::string get_type_string() const
Get the data type of the column as a string.
Definition: bulk_data_service.h:561
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:528
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:519
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:534
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:500
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:509
bool m_is_pk
true if column belongs to primary index (key or non-key)
Definition: bulk_data_service.h:494
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:522
std::string to_string() const
Get a string representation of Column_meta object.
Definition: bulk_data_service.h:655
bool is_integer() const
Definition: bulk_data_service.h:474
Compare
Data comparison method.
Definition: bulk_data_service.h:447
uint16_t m_field_index
Position of column in table.
Definition: bulk_data_service.h:531
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:525
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:506
bool m_is_key
true if column is a key for primary or secondary index.
Definition: bulk_data_service.h:497
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:537
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:637
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:516
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:669
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:491
bool m_is_part_of_sk
true if this column is part of secondary index.
Definition: bulk_data_service.h:485
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:512
std::string get_compare_string() const
Definition: bulk_data_service.h:458
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:540
Definition: bulk_data_service.h:208
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:241
std::string to_string() const
Definition: bulk_data_service.h:264
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:216
char * get_row_begin(const Row_meta &row_meta, size_t col_index) const
Get the pointer to the beginning of row.
Definition: bulk_data_service.h:812
char * m_data_ptr
Column data or row begin.
Definition: bulk_data_service.h:261
char * get_data() const
Definition: bulk_data_service.h:218
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:210
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:213
void set_data(char *ptr)
Definition: bulk_data_service.h:220
void init()
Definition: bulk_data_service.h:243
void row(char *row_begin)
Save the beginning of the row pointer in this object.
Definition: bulk_data_service.h:225
Definition: bulk_data_service.h:89
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:113
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:133
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:161
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:106
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:190
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:91
bool is_row_id() const
Check if it is DB_ROW_ID column based on the value it contains.
Definition: bulk_data_service.h:98
uint64_t m_row_id
The generated DB_ROW_ID value.
Definition: bulk_data_service.h:101
void init()
Initialize the members.
Definition: bulk_data_service.h:145
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:139
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:124
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:94
std::string to_string() const
Definition: bulk_data_service.h:164
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:702
const Column_meta & get_column_meta_index_order(size_t col_index) const
Get the metadata of the given column.
Definition: bulk_data_service.h:726
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:734
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:771
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:741
std::string to_string() const
Get a string representation of this Row_meta object.
Definition: bulk_data_service.h:799
bool dbrowid_is_pk
true if DB_ROW_ID is the pk, false otherwise.
Definition: bulk_data_service.h:780
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:748
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:744
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:704
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:758
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:765
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:755
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:752
std::string m_name
Name of the key.
Definition: bulk_data_service.h:774
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:713
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:716
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:761
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:768
bool is_pk
true if primary key, false if secondary key.
Definition: bulk_data_service.h:777
Definition: table.h:1450
Table metadata.
Definition: bulk_data_service.h:684
size_t m_keynr_pk
Key number of the primary key.
Definition: bulk_data_service.h:689
size_t max_row_id_value
Definition: bulk_data_service.h:695
bool dbrowid_is_pk
True if generated DB_ROW_ID is the pk.
Definition: bulk_data_service.h:692
std::string m_table_name
Table being bulk loaded.
Definition: bulk_data_service.h:698
size_t min_row_id_value
Definition: bulk_data_service.h:694
size_t m_n_keys
Number of keys/indexes the table has.
Definition: bulk_data_service.h:686