MySQL 9.5.0
Source Code Documentation
composite_iterators.h
Go to the documentation of this file.
1#ifndef SQL_ITERATORS_COMPOSITE_ITERATORS_H_
2#define SQL_ITERATORS_COMPOSITE_ITERATORS_H_
3
4/* Copyright (c) 2018, 2025, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file composite_iterators.h
29
30 A composite row iterator is one that takes in one or more existing iterators
31 and processes their rows in some interesting way. They are usually not bound
32 to a single table or similar, but are the inner (non-leaf) nodes of the
33 iterator execution tree. They consistently own their source iterator, although
34 not its memory (since we never allocate row iterators on the heap--usually on
35 a MEM_ROOT>). This means that in the end, you'll end up with a single root
36 iterator which then owns everything else recursively.
37
38 SortingIterator and the two window iterators are also composite iterators,
39 but are defined in their own files.
40 */
41
42#include <assert.h>
43#include <stddef.h>
44#include <stdint.h>
45#include <sys/types.h>
46#include <memory>
47#include <span>
48#include <string>
49#include <utility>
50#include <vector>
51
52#include "my_alloc.h"
53#include "my_base.h"
54#include "my_inttypes.h"
55#include "my_table_map.h"
58#include "sql/join_type.h"
59#include "sql/mem_root_array.h"
60#include "sql/pack_rows.h"
61#include "sql/sql_array.h"
62#include "sql_string.h"
63
64class Cached_item;
66class Item;
67class JOIN;
68class KEY;
70class SJ_TMP_TABLE;
71class Table_ref;
72class THD;
73class Table_function;
75struct TABLE;
76
77/**
78 An iterator that takes in a stream of rows and passes through only those that
79 meet some criteria (i.e., a condition evaluates to true). This is typically
80 used for WHERE/HAVING.
81 */
82class FilterIterator final : public RowIterator {
83 public:
85 Item *condition)
86 : RowIterator(thd), m_source(std::move(source)), m_condition(condition) {}
87
88 void SetNullRowFlag(bool is_null_row) override {
89 m_source->SetNullRowFlag(is_null_row);
90 }
91
92 void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
93 void EndPSIBatchModeIfStarted() override {
94 m_source->EndPSIBatchModeIfStarted();
95 }
96 void UnlockRow() override { m_source->UnlockRow(); }
97
98 private:
99 bool DoInit() override { return m_source->Init(); }
100 int DoRead() override;
101
104};
105
106/**
107 Handles LIMIT and/or OFFSET; Init() eats the first "offset" rows, and Read()
108 stops as soon as it's seen "limit" rows (including any skipped by offset).
109 */
110class LimitOffsetIterator final : public RowIterator {
111 public:
112 /**
113 @param thd Thread context
114 @param source Row source
115 @param limit Maximum number of rows to read, including the ones skipped by
116 offset. Can be HA_POS_ERROR for no limit.
117 @param offset Number of initial rows to skip. Can be 0 for no offset.
118 @param count_all_rows If true, the query will run to completion to get
119 more accurate numbers for skipped_rows, so you will not get any
120 performance benefits of early end.
121 @param reject_multiple_rows True if a derived table transformed from a
122 scalar subquery needs a run-time cardinality check
123 @param skipped_rows If not nullptr, is incremented for each row skipped by
124 offset or limit.
125 */
127 ha_rows limit, ha_rows offset, bool count_all_rows,
128 bool reject_multiple_rows, ha_rows *skipped_rows)
129 : RowIterator(thd),
130 m_source(std::move(source)),
131 m_limit(limit),
132 m_offset(offset),
133 m_count_all_rows(count_all_rows),
134 m_reject_multiple_rows(reject_multiple_rows),
135 m_skipped_rows(skipped_rows) {
136 if (count_all_rows) {
137 assert(m_skipped_rows != nullptr);
138 }
139 }
140
141 void SetNullRowFlag(bool is_null_row) override {
142 m_source->SetNullRowFlag(is_null_row);
143 }
144
145 void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
146 void EndPSIBatchModeIfStarted() override {
147 m_source->EndPSIBatchModeIfStarted();
148 }
149 void UnlockRow() override { m_source->UnlockRow(); }
150
151 private:
152 bool DoInit() override;
153 int DoRead() override;
154
156
157 // Note: The number of seen rows starts off at m_limit if we have OFFSET,
158 // which means we don't need separate LIMIT and OFFSET tests on the
159 // fast path of Read().
161
162 /**
163 Whether we have OFFSET rows that we still need to skip.
164 */
166
171};
172
173/**
174 Handles aggregation (typically used for GROUP BY) for the case where the rows
175 are already properly grouped coming in, ie., all rows that are supposed to be
176 part of the same group are adjacent in the input stream. (This could be
177 because they were sorted earlier, because we are scanning an index that
178 already gives us the rows in a group-compatible order, or because there is no
179 grouping.)
180
181 AggregateIterator needs to be able to save and restore rows; it doesn't know
182 when a group ends until it's seen the first row that is part of the _next_
183 group. When that happens, it needs to tuck away that next row, and then
184 restore the previous row so that the output row gets the correct grouped
185 values. A simple example, doing SELECT a, SUM(b) FROM t1 GROUP BY a:
186
187 t1.a t1.b SUM(b)
188 1 1 <-- first row, save it 1
189 1 2 3
190 1 3 6
191 2 1 <-- group changed, save row
192 [1 1] <-- restore first row, output 6
193 reset aggregate --> 0
194 [2 1] <-- restore new row, process it 1
195 2 10 11
196 <-- EOF, output 11
197
198 To save and restore rows like this, it uses the infrastructure from
199 pack_rows.h to pack and unpack all relevant rows into record[0] of every input
200 table. (Currently, there can only be one input table, but this may very well
201 change in the future.) It would be nice to have a more abstract concept of
202 sending a row around and taking copies of it if needed, as opposed to it
203 implicitly staying in the table's buffer. (This would also solve some
204 issues in EQRefIterator and when synthesizing NULL rows for outer joins.)
205 However, that's a large refactoring.
206 */
207class AggregateIterator final : public RowIterator {
208 public:
211 std::span<AccessPath *> single_row_index_lookups,
212 bool rollup);
213
214 void SetNullRowFlag(bool is_null_row) override {
215 m_source->SetNullRowFlag(is_null_row);
216 }
217
218 void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
219 void EndPSIBatchModeIfStarted() override {
220 m_source->EndPSIBatchModeIfStarted();
221 }
222 void UnlockRow() override {
223 // Most likely, HAVING failed. Ideally, we'd like to backtrack and
224 // unlock all rows that went into this aggregate, but we can't do that,
225 // and we also can't unlock the _current_ row, since that belongs to a
226 // different group. Thus, do nothing.
227 }
228
229 private:
230 bool DoInit() override;
231 int DoRead() override;
232
233 enum {
239
241
242 /**
243 The join we are part of. It would be nicer not to rely on this,
244 but we need a large number of members from there, like which
245 aggregate functions we have, the THD, temporary table parameters
246 and so on.
247 */
248 JOIN *m_join = nullptr;
249
250 /// Whether we have seen the last input row.
252
253 /**
254 Used to save NULL information in the specific case where we have
255 zero input rows.
256 */
258
259 /// Whether this is a rollup query.
260 const bool m_rollup;
261
262 /**
263 For rollup: The index of the first group item that did _not_ change when we
264 last switched groups. E.g., if we have group fields A,B,C,D and then switch
265 to group A,B,E,D, this value will become 1 (which means that we need
266 to output rollup rows for 2 -- A,B,E,NULL -- and then 1 -- A,B,NULL,NULL).
267 m_current_rollup_position will count down from the end until it becomes
268 less than this value.
269
270 If we do not have rollup, this value is perennially zero.
271 */
273
274 /**
275 If we are in state OUTPUTTING_ROLLUP_ROWS, where we are in the iteration.
276 This value will start at the index of the last group expression and then
277 count backwards down to and including m_last_unchanged_group_item_idx.
278 It is used to communicate to the rollup group items whether to turn
279 themselves into NULLs, and the sum items which of their sums to output.
280 */
282
283 /**
284 The list of tables we are reading from; they are the ones for which we need
285 to save and restore rows.
286 */
288
289 /// Packed version of the first row in the group we are currently processing.
291
292 /**
293 If applicable, packed version of the first row in the _next_ group. This is
294 used only in the LAST_ROW_STARTED_NEW_GROUP state; we just saw a row that
295 didn't belong to the current group, so we saved it here and went to output
296 a group. On the next Read() call, we need to process this deferred row
297 first of all.
298
299 Even when not in use, this string contains a buffer that is large enough to
300 pack a full row into, sans blobs. (If blobs are present,
301 StoreFromTableBuffers() will automatically allocate more space if needed.)
302 */
304
305 /// All the single-row index lookups that provide rows to this iterator.
306 std::span<AccessPath *> m_single_row_index_lookups;
307
308 /**
309 The slice we're setting when returning rows. See the comment in the
310 constructor.
311 */
313
314 void SetRollupLevel(int level);
315};
316
317/**
318 A simple nested loop join, taking in two iterators (left/outer and
319 right/inner) and joining them together. This may, of course, scan the inner
320 iterator many times. It is currently the only form of join we have.
321
322 The iterator works as a state machine, where the state records whether we need
323 to read a new outer row or not, and whether we've seen any rows from the inner
324 iterator at all (if not, an outer join need to synthesize a new NULL row).
325
326 The iterator takes care of activating performance schema batch mode on the
327 right iterator if needed; this is typically only used if it is the innermost
328 table in the entire join (where the gains from turning on batch mode is the
329 largest, and the accuracy loss from turning it off are the least critical).
330 */
331class NestedLoopIterator final : public RowIterator {
332 public:
336 JoinType join_type, bool pfs_batch_mode)
337 : RowIterator(thd),
338 m_source_outer(std::move(source_outer)),
339 m_source_inner(std::move(source_inner)),
341 m_pfs_batch_mode(pfs_batch_mode) {
342 assert(m_source_outer != nullptr);
343 assert(m_source_inner != nullptr);
344
345 // Batch mode makes no sense for anti- or semijoins, since they should only
346 // be reading one row.
348 assert(!pfs_batch_mode);
349 }
350 }
351
352 void SetNullRowFlag(bool is_null_row) override {
353 // TODO: write something here about why we can't do this lazily.
354 m_source_outer->SetNullRowFlag(is_null_row);
355 m_source_inner->SetNullRowFlag(is_null_row);
356 }
357
358 void EndPSIBatchModeIfStarted() override {
359 m_source_outer->EndPSIBatchModeIfStarted();
360 m_source_inner->EndPSIBatchModeIfStarted();
361 }
362
363 void UnlockRow() override {
364 // Since we don't know which condition that caused the row to be rejected,
365 // we can't know whether we could also unlock the outer row
366 // (it may still be used as parts of other joined rows).
368 m_source_inner->UnlockRow();
369 }
370 }
371
372 private:
373 bool DoInit() override;
374 int DoRead() override;
375
376 enum {
382
386
387 /** Whether to use batch mode when scanning the inner iterator. */
389};
390
391/**
392 An iterator that helps invalidating caches. Every time a row passes through it
393 or it changes state in any other way, it increments its “generation” counter.
394 This allows MaterializeIterator to see whether any of its dependencies has
395 changed, and then force a rematerialization -- this is typically used for
396 LATERAL tables, where we're joining in a derived table that depends on
397 something earlier in the join.
398 */
400 public:
403 const std::string &name)
404 : RowIterator(thd),
405 m_source_iterator(std::move(source_iterator)),
406 m_name(name) {}
407
408 private:
409 bool DoInit() override {
410 ++m_generation;
411 return m_source_iterator->Init();
412 }
413
414 int DoRead() override {
415 ++m_generation;
416 return m_source_iterator->Read();
417 }
418
419 public:
420 void SetNullRowFlag(bool is_null_row) override {
421 ++m_generation;
422 m_source_iterator->SetNullRowFlag(is_null_row);
423 }
424
425 void UnlockRow() override { m_source_iterator->UnlockRow(); }
426
427 int64_t generation() const { return m_generation; }
428 std::string name() const { return m_name; }
429
430 private:
432 int64_t m_generation = 0;
433 std::string m_name;
434};
435
437/**
438 An operand (query block) to be materialized by MaterializeIterator.
439 (@see MaterializeIterator for details.)
440*/
441struct Operand {
442 /// The iterator to read the actual rows from.
444
445 /// Used only for optimizer trace.
447
448 /// The JOIN that this query block represents. Used for performance
449 /// schema batch mode: When materializing a query block that consists of
450 /// a single table, MaterializeIterator needs to set up schema batch mode,
451 /// since there is no nested loop iterator to do it. (This is similar to
452 /// what ExecuteIteratorQuery() needs to do at the top level.)
454
455 /// If true, de-duplication checking via hash key is disabled
456 /// when materializing this query block (ie., we simply avoid calling
457 /// check_unique_fields() for each row). Used when materializing
458 /// UNION DISTINCT and UNION ALL parts into the same table.
459 /// We'd like to just use a unique constraint via unique index instead,
460 /// but there might be other indexes on the destination table
461 /// that we'd like to keep, and the implementation doesn't allow
462 /// disabling only one index.
463 ///
464 /// If you use this on a query block, doing_hash_deduplication()
465 /// must be true.
467
468 /// If set to false, the Field objects in the output row are
469 /// presumed already to be filled out. This is the case iff
470 /// there's a windowing iterator earlier in the chain.
472
473 /// The number of operands (i.e. blocks) involved in the set operation:
474 /// used for INTERSECT to determine if a value is present in all operands
476 /// The current operand (i.e. block) number, starting at zero. We use this
477 /// for INTERSECT and EXCEPT materialization operand.
479 /// Used for EXCEPT computation: the index of the first operand involved in
480 /// a N-ary except operation which has DISTINCT. This is significant for
481 /// calculating whether to set the counter to zero or just decrement it
482 /// when we see a right side operand.
484
485 /// If copy_items is true, used for copying the Field objects
486 /// into the temporary table row. Otherwise unused.
488
489 // Whether this query block is a recursive reference back to the
490 // output of the materialization.
492
493 // If is_recursive_reference is true, contains the FollowTailIterator
494 // in the query block (there can be at most one recursive reference
495 // in a join list, as per the SQL standard, so there should be exactly one).
496 // Used for informing the iterators about various shared state in the
497 // materialization (including coordinating rematerializations).
499
500 /// The estimated number of rows produced by this block
502};
503
504/**
505 Create an iterator that materializes a set of row into a temporary table
506 and sets up a (pre-existing) iterator to access that.
507 @see MaterializeIterator.
508
509 @param thd Thread handler.
510 @param operands List of operands (query blocks) to materialize.
511 @param path_params MaterializePath settings.
512 @param table_iterator Iterator used for accessing the temporary table
513 after materialization.
514 @param join
515 When materializing within the same JOIN (e.g., into a temporary table
516 before sorting), as opposed to a derived table or a CTE, we may need
517 to change the slice on the join before returning rows from the result
518 table. If so, join and ref_slice would need to be set, and
519 query_blocks_to_materialize should contain only one member, with the same
520 join.
521 @return the iterator.
522*/
525 const MaterializePathParameters *path_params,
527
528} // namespace materialize_iterator
529
531/**
532 Create an iterator that aggregates the output rows from another iterator
533 into a temporary table and then sets up a (pre-existing) iterator to
534 access the temporary table.
535 @see TemptableAggregateIterator.
536
537 @param thd Thread handler.
538 @param subquery_iterator input to aggregation.
539 @param temp_table_param temporary table settings.
540 @param table_iterator Iterator used for scanning the temporary table
541 after materialization.
542 @param table the temporary table.
543 @param join the JOIN in which we aggregate.
544 @param ref_slice the slice to set when accessing temporary table;
545 used if anything upstream wants to evaluate values based on its contents.
546 @return the iterator.
547*/
549 THD *thd, unique_ptr_destroy_only<RowIterator> subquery_iterator,
550 Temp_table_param *temp_table_param, TABLE *table,
552 int ref_slice);
553
554} // namespace temptable_aggregate_iterator
555
556/**
557 StreamingIterator is a minimal version of MaterializeIterator that does not
558 actually materialize; instead, every Read() just forwards the call to the
559 subquery iterator and does the required copying from one set of fields to
560 another.
561
562 It is used for when the optimizer would normally set up a materialization,
563 but you don't actually need one, ie. you don't want to read the rows multiple
564 times after writing them, and you don't want to access them by index (only
565 a single table scan). It also takes care of setting the NULL row flag
566 on the temporary table.
567 */
569 public:
570 /**
571 @param thd Thread handle.
572 @param subquery_iterator The iterator to read rows from.
573 @param temp_table_param Parameters for the temp table.
574 @param table The table we are streaming through. Will never actually
575 be written to, but its fields will be used.
576 @param provide_rowid If true, generate a row ID for each row we stream.
577 This is used if the parent needs row IDs for deduplication, in particular
578 weedout.
579 @param join See MaterializeIterator.
580 @param ref_slice See MaterializeIterator.
581 */
583 unique_ptr_destroy_only<RowIterator> subquery_iterator,
584 Temp_table_param *temp_table_param, TABLE *table,
585 bool provide_rowid, JOIN *join, int ref_slice);
586
587 void StartPSIBatchMode() override {
588 m_subquery_iterator->StartPSIBatchMode();
589 }
590 void EndPSIBatchModeIfStarted() override {
591 m_subquery_iterator->EndPSIBatchModeIfStarted();
592 }
593 void UnlockRow() override { m_subquery_iterator->UnlockRow(); }
594
595 private:
596 bool DoInit() override;
597 int DoRead() override;
601 JOIN *const m_join;
602 const int m_output_slice;
604
605 // Whether the iterator should generate and provide a row ID. Only true if the
606 // iterator is part of weedout, where the iterator will create a fake row ID
607 // to uniquely identify the rows it produces.
608 const bool m_provide_rowid;
609};
610
611/**
612 An iterator that wraps a Table_function (e.g. JSON_TABLE) and allows you to
613 iterate over the materialized temporary table. The table is materialized anew
614 for every Init().
615
616 TODO: Just wrapping it is probably not the optimal thing to do;
617 Table_function is highly oriented around materialization, but never caches.
618 Thus, perhaps we should rewrite Table_function to return a RowIterator
619 instead of going through a temporary table.
620 */
622 public:
624 THD *thd, Table_function *table_function, TABLE *table,
626
627 void SetNullRowFlag(bool is_null_row) override {
628 m_table_iterator->SetNullRowFlag(is_null_row);
629 }
630
631 void StartPSIBatchMode() override { m_table_iterator->StartPSIBatchMode(); }
632 void EndPSIBatchModeIfStarted() override {
633 m_table_iterator->EndPSIBatchModeIfStarted();
634 }
635
636 // The temporary table is private to us, so there's no need to worry about
637 // locks to other transactions.
638 void UnlockRow() override {}
639
640 private:
641 bool DoInit() override;
642 int DoRead() override { return m_table_iterator->Read(); }
643
645
647};
648
649/**
650 Like semijoin materialization, weedout works on the basic idea that a semijoin
651 is just like an inner join as we long as we can get rid of the duplicates
652 somehow. (This is advantageous, because inner joins can be reordered, whereas
653 semijoins generally can't.) However, unlike semijoin materialization, weedout
654 removes duplicates after the join, not before it. Consider something like
655
656 SELECT * FROM t1 WHERE a IN ( SELECT b FROM t2 );
657
658 Semijoin materialization solves this by materializing t2, with deduplication,
659 and then joining. Weedout joins t1 to t2 and then leaves only one output row
660 per t1 row. The disadvantage is that this potentially needs to discard more
661 rows; the (potential) advantage is that we deduplicate on t1 instead of t2.
662
663 Weedout, unlike materialization, works in a streaming fashion; rows are output
664 (or discarded) as they come in, with a temporary table used for recording the
665 row IDs we've seen before. (We need to deduplicate on t1's row IDs, not its
666 contents.) See SJ_TMP_TABLE for details about the table format.
667 */
668class WeedoutIterator final : public RowIterator {
669 public:
671 SJ_TMP_TABLE *sj, table_map tables_to_get_rowid_for);
672
673 void SetNullRowFlag(bool is_null_row) override {
674 m_source->SetNullRowFlag(is_null_row);
675 }
676
677 void EndPSIBatchModeIfStarted() override {
678 m_source->EndPSIBatchModeIfStarted();
679 }
680 void UnlockRow() override { m_source->UnlockRow(); }
681
682 private:
683 bool DoInit() override;
684 int DoRead() override;
688};
689
690/**
691 An iterator that removes consecutive rows that are the same according to
692 a set of items (typically the join key), so-called “loose scan”
693 (not to be confused with “loose index scan”, which is made by the
694 range optimizer). This is similar in spirit to WeedoutIterator above
695 (removing duplicates allows us to treat the semijoin as a normal join),
696 but is much cheaper if the data is already ordered/grouped correctly,
697 as the removal can happen before the join, and it does not need a
698 temporary table.
699 */
701 public:
704 JOIN *join, std::span<Item *> group_items);
705
706 void SetNullRowFlag(bool is_null_row) override {
707 m_source->SetNullRowFlag(is_null_row);
708 }
709
710 void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
711 void EndPSIBatchModeIfStarted() override {
712 m_source->EndPSIBatchModeIfStarted();
713 }
714 void UnlockRow() override { m_source->UnlockRow(); }
715
716 private:
717 bool DoInit() override;
718 int DoRead() override;
722};
723
724/**
725 Much like RemoveDuplicatesIterator, but works on the basis of a given index
726 (or more accurately, its keypart), not an arbitrary list of grouped fields.
727 This is only used in the non-hypergraph optimizer; the hypergraph optimizer
728 can deal with groupings that come from e.g. sorts.
729 */
731 public:
734 const TABLE *table, KEY *key, size_t key_len);
735
736 void SetNullRowFlag(bool is_null_row) override {
737 m_source->SetNullRowFlag(is_null_row);
738 }
739
740 void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
741 void EndPSIBatchModeIfStarted() override {
742 m_source->EndPSIBatchModeIfStarted();
743 }
744 void UnlockRow() override { m_source->UnlockRow(); }
745
746 private:
747 bool DoInit() override;
748 int DoRead() override;
749
753 uchar *m_key_buf; // Owned by the THD's MEM_ROOT.
754 const size_t m_key_len;
756};
757
758/**
759 An iterator that is semantically equivalent to a semijoin NestedLoopIterator
760 immediately followed by a RemoveDuplicatesOnIndexIterator. It is used to
761 implement the “loose scan” strategy in queries with multiple tables on the
762 inside of a semijoin, like
763
764 ... FROM t1 WHERE ... IN ( SELECT ... FROM t2 JOIN t3 ... )
765
766 In this case, the query tree without this iterator would ostensibly look like
767
768 -> Nested loop join
769 -> Table scan on t1
770 -> Remove duplicates on t2_idx
771 -> Nested loop semijoin
772 -> Index scan on t2 using t2_idx
773 -> Filter (e.g. t3.a = t2.a)
774 -> Table scan on t3
775
776 (t3 will be marked as “first match” on t2 when implementing loose scan,
777 thus the semijoin.)
778
779 First note that we can't put the duplicate removal directly on t2 in this
780 case, as the first t2 row doesn't necessarily match anything in t3, so it
781 needs to be above. However, this is wasteful, because once we find a matching
782 t2/t3 pair, we should stop scanning t3 until we have a new t2.
783
784 NestedLoopSemiJoinWithDuplicateRemovalIterator solves the problem by doing
785 exactly this; it gets a row from the outer side, gets exactly one row from the
786 inner side, and then skips over rows from the outer side (_without_ scanning
787 the inner side) until its keypart changes.
788 */
790 : public RowIterator {
791 public:
795 KEY *key, size_t key_len);
796
797 void SetNullRowFlag(bool is_null_row) override {
798 m_source_outer->SetNullRowFlag(is_null_row);
799 m_source_inner->SetNullRowFlag(is_null_row);
800 }
801
802 void EndPSIBatchModeIfStarted() override {
803 m_source_outer->EndPSIBatchModeIfStarted();
804 m_source_inner->EndPSIBatchModeIfStarted();
805 }
806
807 void UnlockRow() override {
808 m_source_outer->UnlockRow();
809 m_source_inner->UnlockRow();
810 }
811
812 private:
813 bool DoInit() override;
814 int DoRead() override;
815
818
821 uchar *m_key_buf; // Owned by the THD's MEM_ROOT.
822 const size_t m_key_len;
824};
825
826/**
827 MaterializeInformationSchemaTableIterator makes sure a given I_S temporary
828 table is materialized (filled out) before we try to scan it.
829 */
831 public:
834 Table_ref *table_list, Item *condition);
835
836 void SetNullRowFlag(bool is_null_row) override {
837 m_table_iterator->SetNullRowFlag(is_null_row);
838 }
839
840 void StartPSIBatchMode() override { m_table_iterator->StartPSIBatchMode(); }
841 void EndPSIBatchModeIfStarted() override {
842 m_table_iterator->EndPSIBatchModeIfStarted();
843 }
844
845 // The temporary table is private to us, so there's no need to worry about
846 // locks to other transactions.
847 void UnlockRow() override {}
848
849 private:
850 bool DoInit() override;
851 int DoRead() override { return m_table_iterator->Read(); }
852
853 /// The iterator that reads from the materialized table.
857};
858
859/**
860 Takes in two or more iterators and output rows from them sequentially
861 (first all rows from the first one, the all from the second one, etc.).
862 Used for implementing UNION ALL, typically together with StreamingIterator.
863 */
864class AppendIterator final : public RowIterator {
865 public:
867 THD *thd,
869
870 void StartPSIBatchMode() override;
871 void EndPSIBatchModeIfStarted() override;
872
873 void SetNullRowFlag(bool is_null_row) override;
874 void UnlockRow() override;
875
876 private:
877 bool DoInit() override;
878 int DoRead() override;
879
880 std::vector<unique_ptr_destroy_only<RowIterator>> m_sub_iterators;
883};
884
885#endif // SQL_ITERATORS_COMPOSITE_ITERATORS_H_
Handles aggregation (typically used for GROUP BY) for the case where the rows are already properly gr...
Definition: composite_iterators.h:207
void UnlockRow() override
Definition: composite_iterators.h:222
int m_current_rollup_position
If we are in state OUTPUTTING_ROLLUP_ROWS, where we are in the iteration.
Definition: composite_iterators.h:281
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:218
JOIN * m_join
The join we are part of.
Definition: composite_iterators.h:248
bool m_seen_eof
Whether we have seen the last input row.
Definition: composite_iterators.h:251
int DoRead() override
Definition: composite_iterators.cc:261
AggregateIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, JOIN *join, pack_rows::TableCollection tables, std::span< AccessPath * > single_row_index_lookups, bool rollup)
Definition: composite_iterators.cc:197
bool DoInit() override
Definition: composite_iterators.cc:217
pack_rows::TableCollection m_tables
The list of tables we are reading from; they are the ones for which we need to save and restore rows.
Definition: composite_iterators.h:287
@ LAST_ROW_STARTED_NEW_GROUP
Definition: composite_iterators.h:235
@ READING_FIRST_ROW
Definition: composite_iterators.h:234
@ OUTPUTTING_ROLLUP_ROWS
Definition: composite_iterators.h:236
@ DONE_OUTPUTTING_ROWS
Definition: composite_iterators.h:237
String m_first_row_this_group
Packed version of the first row in the group we are currently processing.
Definition: composite_iterators.h:290
String m_first_row_next_group
If applicable, packed version of the first row in the next group.
Definition: composite_iterators.h:303
table_map m_save_nullinfo
Used to save NULL information in the specific case where we have zero input rows.
Definition: composite_iterators.h:257
enum AggregateIterator::@65 m_state
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:240
int m_output_slice
The slice we're setting when returning rows.
Definition: composite_iterators.h:312
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:219
const bool m_rollup
Whether this is a rollup query.
Definition: composite_iterators.h:260
std::span< AccessPath * > m_single_row_index_lookups
All the single-row index lookups that provide rows to this iterator.
Definition: composite_iterators.h:306
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:214
void SetRollupLevel(int level)
Definition: composite_iterators.cc:477
int m_last_unchanged_group_item_idx
For rollup: The index of the first group item that did not change when we last switched groups.
Definition: composite_iterators.h:272
Takes in two or more iterators and output rows from them sequentially (first all rows from the first ...
Definition: composite_iterators.h:864
size_t m_current_iterator_index
Definition: composite_iterators.h:881
AppendIterator(THD *thd, std::vector< unique_ptr_destroy_only< RowIterator > > &&sub_iterators)
Definition: composite_iterators.cc:4500
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.cc:4537
int DoRead() override
Definition: composite_iterators.cc:4512
bool DoInit() override
Definition: composite_iterators.cc:4506
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.cc:4547
void UnlockRow() override
Definition: composite_iterators.cc:4555
std::vector< unique_ptr_destroy_only< RowIterator > > m_sub_iterators
Definition: composite_iterators.h:880
bool m_pfs_batch_mode_enabled
Definition: composite_iterators.h:882
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.cc:4542
A wrapper class which provides array bounds checking.
Definition: sql_array.h:48
An iterator that helps invalidating caches.
Definition: composite_iterators.h:399
CacheInvalidatorIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source_iterator, const std::string &name)
Definition: composite_iterators.h:401
int DoRead() override
Definition: composite_iterators.h:414
std::string m_name
Definition: composite_iterators.h:433
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:420
unique_ptr_destroy_only< RowIterator > m_source_iterator
Definition: composite_iterators.h:431
void UnlockRow() override
Definition: composite_iterators.h:425
bool DoInit() override
Definition: composite_iterators.h:409
int64_t m_generation
Definition: composite_iterators.h:432
std::string name() const
Definition: composite_iterators.h:428
int64_t generation() const
Definition: composite_iterators.h:427
This is used for segregating rows in groups (e.g.
Definition: item.h:6572
An iterator that takes in a stream of rows and passes through only those that meet some criteria (i....
Definition: composite_iterators.h:82
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:93
void UnlockRow() override
Definition: composite_iterators.h:96
int DoRead() override
Definition: composite_iterators.cc:96
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:102
FilterIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, Item *condition)
Definition: composite_iterators.h:84
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:92
Item * m_condition
Definition: composite_iterators.h:103
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:88
bool DoInit() override
Definition: composite_iterators.h:99
FollowTailIterator is a special version of TableScanIterator that is used as part of WITH RECURSIVE q...
Definition: basic_row_iterators.h:476
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:928
Definition: sql_optimizer.h:133
Definition: key.h:113
Handles LIMIT and/or OFFSET; Init() eats the first "offset" rows, and Read() stops as soon as it's se...
Definition: composite_iterators.h:110
void UnlockRow() override
Definition: composite_iterators.h:149
LimitOffsetIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, ha_rows limit, ha_rows offset, bool count_all_rows, bool reject_multiple_rows, ha_rows *skipped_rows)
Definition: composite_iterators.h:126
ha_rows m_seen_rows
Definition: composite_iterators.h:160
const bool m_count_all_rows
Definition: composite_iterators.h:168
ha_rows * m_skipped_rows
Definition: composite_iterators.h:170
int DoRead() override
Definition: composite_iterators.cc:135
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:146
const ha_rows m_limit
Definition: composite_iterators.h:167
const ha_rows m_offset
Definition: composite_iterators.h:167
bool m_needs_offset
Whether we have OFFSET rows that we still need to skip.
Definition: composite_iterators.h:165
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:141
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:155
const bool m_reject_multiple_rows
Definition: composite_iterators.h:169
bool DoInit() override
Definition: composite_iterators.cc:121
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:145
MaterializeInformationSchemaTableIterator makes sure a given I_S temporary table is materialized (fil...
Definition: composite_iterators.h:830
Item * m_condition
Definition: composite_iterators.h:856
void UnlockRow() override
Definition: composite_iterators.h:847
int DoRead() override
Definition: composite_iterators.h:851
MaterializeInformationSchemaTableIterator(THD *thd, unique_ptr_destroy_only< RowIterator > table_iterator, Table_ref *table_list, Item *condition)
Definition: composite_iterators.cc:4475
Table_ref * m_table_list
Definition: composite_iterators.h:855
bool DoInit() override
Definition: composite_iterators.cc:4483
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:836
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:841
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:840
unique_ptr_destroy_only< RowIterator > m_table_iterator
The iterator that reads from the materialized table.
Definition: composite_iterators.h:854
An iterator that wraps a Table_function (e.g.
Definition: composite_iterators.h:621
Table_function * m_table_function
Definition: composite_iterators.h:646
unique_ptr_destroy_only< RowIterator > m_table_iterator
Definition: composite_iterators.h:644
int DoRead() override
Definition: composite_iterators.h:642
bool DoInit() override
Definition: composite_iterators.cc:4236
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:631
void UnlockRow() override
Definition: composite_iterators.h:638
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:627
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:632
MaterializedTableFunctionIterator(THD *thd, Table_function *table_function, TABLE *table, unique_ptr_destroy_only< RowIterator > table_iterator)
Definition: composite_iterators.cc:4229
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:432
A simple nested loop join, taking in two iterators (left/outer and right/inner) and joining them toge...
Definition: composite_iterators.h:331
int DoRead() override
Definition: composite_iterators.cc:500
void UnlockRow() override
Definition: composite_iterators.h:363
@ END_OF_ROWS
Definition: composite_iterators.h:380
@ READING_INNER_ROWS
Definition: composite_iterators.h:379
@ NEEDS_OUTER_ROW
Definition: composite_iterators.h:377
@ READING_FIRST_INNER_ROW
Definition: composite_iterators.h:378
const bool m_pfs_batch_mode
Whether to use batch mode when scanning the inner iterator.
Definition: composite_iterators.h:388
NestedLoopIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source_outer, unique_ptr_destroy_only< RowIterator > source_inner, JoinType join_type, bool pfs_batch_mode)
Definition: composite_iterators.h:333
unique_ptr_destroy_only< RowIterator > const m_source_inner
Definition: composite_iterators.h:384
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:358
const JoinType m_join_type
Definition: composite_iterators.h:385
bool DoInit() override
Definition: composite_iterators.cc:489
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:352
unique_ptr_destroy_only< RowIterator > const m_source_outer
Definition: composite_iterators.h:383
enum NestedLoopIterator::@66 m_state
An iterator that is semantically equivalent to a semijoin NestedLoopIterator immediately followed by ...
Definition: composite_iterators.h:790
void UnlockRow() override
Definition: composite_iterators.h:807
int DoRead() override
Definition: composite_iterators.cc:4418
KEY * m_key
Definition: composite_iterators.h:820
unique_ptr_destroy_only< RowIterator > const m_source_outer
Definition: composite_iterators.h:816
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:797
const size_t m_key_len
Definition: composite_iterators.h:822
bool m_deduplicate_against_previous_row
Definition: composite_iterators.h:823
bool DoInit() override
Definition: composite_iterators.cc:4410
uchar * m_key_buf
Definition: composite_iterators.h:821
unique_ptr_destroy_only< RowIterator > const m_source_inner
Definition: composite_iterators.h:817
NestedLoopSemiJoinWithDuplicateRemovalIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source_outer, unique_ptr_destroy_only< RowIterator > source_inner, const TABLE *table, KEY *key, size_t key_len)
Definition: composite_iterators.cc:4395
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:802
const TABLE * m_table_outer
Definition: composite_iterators.h:819
An iterator that removes consecutive rows that are the same according to a set of items (typically th...
Definition: composite_iterators.h:700
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:710
RemoveDuplicatesIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, JOIN *join, std::span< Item * > group_items)
Definition: composite_iterators.cc:4310
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:711
void UnlockRow() override
Definition: composite_iterators.h:714
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:706
bool m_first_row
Definition: composite_iterators.h:721
bool DoInit() override
Definition: composite_iterators.cc:4322
Bounds_checked_array< Cached_item * > m_caches
Definition: composite_iterators.h:720
int DoRead() override
Definition: composite_iterators.cc:4327
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:719
Much like RemoveDuplicatesIterator, but works on the basis of a given index (or more accurately,...
Definition: composite_iterators.h:730
int DoRead() override
Definition: composite_iterators.cc:4369
void UnlockRow() override
Definition: composite_iterators.h:744
uchar * m_key_buf
Definition: composite_iterators.h:753
bool m_first_row
Definition: composite_iterators.h:755
const TABLE * m_table
Definition: composite_iterators.h:751
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:740
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:736
const size_t m_key_len
Definition: composite_iterators.h:754
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:741
RemoveDuplicatesOnIndexIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, const TABLE *table, KEY *key, size_t key_len)
Definition: composite_iterators.cc:4354
KEY * m_key
Definition: composite_iterators.h:752
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:750
bool DoInit() override
Definition: composite_iterators.cc:4364
A context for reading through a single table using a chosen access method: index read,...
Definition: row_iterator.h:82
THD * thd() const
Definition: row_iterator.h:255
Definition: sql_executor.h:95
StreamingIterator is a minimal version of MaterializeIterator that does not actually materialize; ins...
Definition: composite_iterators.h:568
bool DoInit() override
Definition: composite_iterators.cc:3795
JOIN *const m_join
Definition: composite_iterators.h:601
void UnlockRow() override
Definition: composite_iterators.h:593
int DoRead() override
Definition: composite_iterators.cc:3818
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: composite_iterators.h:587
StreamingIterator(THD *thd, unique_ptr_destroy_only< RowIterator > subquery_iterator, Temp_table_param *temp_table_param, TABLE *table, bool provide_rowid, JOIN *join, int ref_slice)
Definition: composite_iterators.cc:3766
Temp_table_param * m_temp_table_param
Definition: composite_iterators.h:599
const bool m_provide_rowid
Definition: composite_iterators.h:608
const int m_output_slice
Definition: composite_iterators.h:602
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:590
int m_input_slice
Definition: composite_iterators.h:603
unique_ptr_destroy_only< RowIterator > m_subquery_iterator
Definition: composite_iterators.h:598
ha_rows m_row_number
Definition: composite_iterators.h:600
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:169
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Definition: row_iterator.h:267
TABLE * table() const
Definition: row_iterator.h:279
Class representing a table function.
Definition: table_function.h:53
Definition: table.h:2933
Object containing parameters used when creating and using temporary tables.
Definition: temp_table_param.h:97
Like semijoin materialization, weedout works on the basic idea that a semijoin is just like an inner ...
Definition: composite_iterators.h:668
bool DoInit() override
Definition: composite_iterators.cc:4262
unique_ptr_destroy_only< RowIterator > m_source
Definition: composite_iterators.h:685
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: composite_iterators.h:677
WeedoutIterator(THD *thd, unique_ptr_destroy_only< RowIterator > source, SJ_TMP_TABLE *sj, table_map tables_to_get_rowid_for)
Definition: composite_iterators.cc:4249
const table_map m_tables_to_get_rowid_for
Definition: composite_iterators.h:687
int DoRead() override
Definition: composite_iterators.cc:4279
SJ_TMP_TABLE * m_sj
Definition: composite_iterators.h:686
void UnlockRow() override
Definition: composite_iterators.h:680
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: composite_iterators.h:673
A structure that contains a list of input tables for a hash join operation, BKA join operation or a s...
Definition: pack_rows.h:84
JoinType
Definition: join_type.h:28
@ ANTI
Left antijoin, i.e.
@ SEMI
Left semijoin, i.e.
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
std::unique_ptr< T, Destroy_only< T > > unique_ptr_destroy_only
std::unique_ptr, but only destroying.
Definition: my_alloc.h:480
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1217
Some integer typedefs for easier portability.
unsigned long long int ulonglong
Definition: my_inttypes.h:56
unsigned char uchar
Definition: my_inttypes.h:52
uint64_t table_map
Definition: my_table_map.h:30
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
Definition: composite_iterators.h:436
RowIterator * CreateIterator(THD *thd, Mem_root_array< materialize_iterator::Operand > operands, const MaterializePathParameters *path_params, unique_ptr_destroy_only< RowIterator > table_iterator, JOIN *join)
Create an iterator that materializes a set of row into a temporary table and sets up a (pre-existing)...
Definition: composite_iterators.cc:3743
std::string join(const detail::range auto &rng, std::string_view delim)
join elements of a range into a string separated by a delimiter.
Definition: string.h:74
Definition: gcs_xcom_synode.h:64
Definition: composite_iterators.h:530
RowIterator * CreateIterator(THD *thd, unique_ptr_destroy_only< RowIterator > subquery_iterator, Temp_table_param *temp_table_param, TABLE *table, unique_ptr_destroy_only< RowIterator > table_iterator, JOIN *join, int ref_slice)
Create an iterator that aggregates the output rows from another iterator into a temporary table and t...
Definition: composite_iterators.cc:4201
std::vector< T, ut::allocator< T > > vector
Specialization of vector which uses allocator.
Definition: ut0new.h:2880
Generic routines for packing rows (possibly from multiple tables at the same time) into strings,...
required string key
Definition: replication_asynchronous_connection_failover.proto:60
repeated Source source
Definition: replication_asynchronous_connection_failover.proto:42
join_type
Definition: sql_opt_exec_shared.h:186
Our own string classes, used pervasively throughout the executor.
Definition: materialize_path_parameters.h:40
Definition: table.h:1435
An operand (query block) to be materialized by MaterializeIterator.
Definition: composite_iterators.h:441
unique_ptr_destroy_only< RowIterator > subquery_iterator
The iterator to read the actual rows from.
Definition: composite_iterators.h:443
bool copy_items
If set to false, the Field objects in the output row are presumed already to be filled out.
Definition: composite_iterators.h:471
Temp_table_param * temp_table_param
If copy_items is true, used for copying the Field objects into the temporary table row.
Definition: composite_iterators.h:487
double m_estimated_output_rows
The estimated number of rows produced by this block.
Definition: composite_iterators.h:501
ulonglong m_operand_idx
The current operand (i.e.
Definition: composite_iterators.h:478
bool is_recursive_reference
Definition: composite_iterators.h:491
FollowTailIterator * recursive_reader
Definition: composite_iterators.h:498
int select_number
Used only for optimizer trace.
Definition: composite_iterators.h:446
ulonglong m_total_operands
The number of operands (i.e.
Definition: composite_iterators.h:475
uint m_first_distinct
Used for EXCEPT computation: the index of the first operand involved in a N-ary except operation whic...
Definition: composite_iterators.h:483
bool disable_deduplication_by_hash_field
If true, de-duplication checking via hash key is disabled when materializing this query block (ie....
Definition: composite_iterators.h:466
JOIN * join
The JOIN that this query block represents.
Definition: composite_iterators.h:453