MySQL 9.5.0
Source Code Documentation
row_iterator.h
Go to the documentation of this file.
1#ifndef SQL_ITERATORS_ROW_ITERATOR_H_
2#define SQL_ITERATORS_ROW_ITERATOR_H_
3
4/* Copyright (c) 2018, 2025, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27#include <assert.h>
28#include <cstdint>
29
30class Item;
31class JOIN;
32class THD;
33struct TABLE;
34
35/**
36 Profiling data for an iterator, needed by 'EXPLAIN ANALYZE'.
37 Note that an iterator may be iterated over multiple times, e.g. if it is
38 the inner operand of a neste loop join. This is denoted 'loops'
39 below, and the metrics in this class are aggregated values for all loops.
40*/
42 public:
43 /** Time (in ms) spent fetching the first row. (Sum for all loops.)*/
44 virtual double GetFirstRowMs() const = 0;
45
46 /** Time (in ms) spent fetching the remaining rows. (Sum for all loops.)*/
47 virtual double GetLastRowMs() const = 0;
48
49 /** The number of loops (i.e number of iterator->Init() calls.*/
50 virtual uint64_t GetNumInitCalls() const = 0;
51
52 /** The number of rows fetched. (Sum for all loops.)*/
53 virtual uint64_t GetNumRows() const = 0;
54 virtual ~IteratorProfiler() = default;
55};
56
57/**
58 A context for reading through a single table using a chosen access method:
59 index read, scan, etc, use of cache, etc.. It is mostly meant as an interface,
60 but also contains some private member functions that are useful for many
61 implementations, such as error handling.
62
63 A RowIterator is a simple iterator; you initialize it, and then read one
64 record at a time until Read() returns EOF. A RowIterator can read from
65 other Iterators if you want to, e.g., SortingIterator, which takes in records
66 from another RowIterator and sorts them.
67
68 The abstraction is not completely tight. In particular, it still leaves some
69 specifics to TABLE, such as which columns to read (the read_set). This means
70 it would probably be hard as-is to e.g. sort a join of two tables.
71
72 Use by:
73@code
74 unique_ptr<RowIterator> iterator(new ...);
75 if (iterator->Init())
76 return true;
77 while (iterator->Read() == 0) {
78 ...
79 }
80@endcode
81 */
83 public:
84 // NOTE: Iterators should typically be instantiated using NewIterator,
85 // in sql/iterators/timing_iterator.h.
86 explicit RowIterator(THD *thd) : m_thd(thd) {}
87 virtual ~RowIterator() = default;
88
89 RowIterator(const RowIterator &) = delete;
90
91 // Default move ctor used by IndexRangeScanIterator.
92 RowIterator(RowIterator &&) = default;
93
94 /**
95 Initialize or reinitialize the iterator. You must always call Init()
96 before trying a Read() (but Init() does not imply Read()).
97
98 You can call Init() multiple times; subsequent calls will rewind the
99 iterator (or reposition it, depending on whether the iterator takes in
100 e.g. a Index_lookup) and allow you to read the records anew.
101
102 Subclasses should implement DoInit() to do the actual initalization of the
103 iterator.
104 */
105 bool Init() {
107 return DoInit();
108 }
109
110 /**
111 Read a single row. The row data is not actually returned from the function;
112 it is put in the table's (or tables', in case of a join) record buffer, ie.,
113 table->records[0].
114
115 Subclasses should override DoRead() to do the actual reading of the row.
116
117 @retval
118 0 OK
119 @retval
120 -1 End of records
121 @retval
122 1 Error
123 */
124 int Read() {
125 const int error = DoRead();
126 if (error == 0) {
127 ++m_num_rows;
128 } else if (error == -1) {
130 }
131 return error;
132 }
133
134 /**
135 Mark the current row buffer as containing a NULL row or not, so that if you
136 read from it and the flag is true, you'll get only NULLs no matter what is
137 actually in the buffer (typically some old leftover row). This is used
138 for outer joins, when an iterator hasn't produced any rows and we need to
139 produce a NULL-complemented row. Init() or Read() won't necessarily
140 reset this flag, so if you ever set is to true, make sure to also set it
141 to false when needed.
142
143 Note that this can be called without Init() having been called first.
144 For example, NestedLoopIterator can hit EOF immediately on the outer
145 iterator, which means the inner iterator doesn't get an Init() call,
146 but will still forward SetNullRowFlag to both inner and outer iterators.
147
148 TODO: We shouldn't need this. See the comments on AggregateIterator for
149 a bit more discussion on abstracting out a row interface.
150 */
151 virtual void SetNullRowFlag(bool is_null_row) = 0;
152
153 // In certain queries, such as SELECT FOR UPDATE, UPDATE or DELETE queries,
154 // reading rows will automatically take locks on them. (This means that the
155 // set of locks taken will depend on whether e.g. the optimizer chose a table
156 // scan or used an index, due to InnoDB's row locking scheme with “gap locks”
157 // for B-trees instead of full predicate locks.)
158 //
159 // However, under some transaction isolation levels (READ COMMITTED or
160 // less strict), it is possible to release such locks if and only if the row
161 // failed a WHERE predicate, as only the returned rows are protected,
162 // not _which_ rows are returned. Thus, if Read() returned a row that you did
163 // not actually use, you should call UnlockRow() afterwards, which allows the
164 // storage engine to release the row lock in such situations.
165 //
166 // TableRowIterator has a default implementation of this; other iterators
167 // should usually either forward the call to their source iterator (if any)
168 // or just ignore it. The right behavior depends on the iterator.
169 virtual void UnlockRow() = 0;
170
171 /** Get profiling data for this iterator (for 'EXPLAIN ANALYZE').*/
172 virtual const IteratorProfiler *GetProfiler() const {
173 /**
174 Valid for TimingIterator, MaterializeIterator and
175 TemptableAggregateIterator only.
176 */
177 assert(false);
178 return nullptr;
179 }
180
181 /** @see TimingIterator .*/
183 [[maybe_unused]] const IteratorProfiler *profiler) {
184 // Valid for TimingIterator only.
185 assert(false);
186 }
187
188 /**
189 Start performance schema batch mode, if supported (otherwise ignored).
190
191 PFS batch mode is a mitigation to reduce the overhead of performance schema,
192 typically applied at the innermost table of the entire join. If you start
193 it before scanning the table and then end it afterwards, the entire set
194 of handler calls will be timed only once, as a group, and the costs will
195 be distributed evenly out. This reduces timer overhead.
196
197 If you start PFS batch mode, you must also take care to end it at the
198 end of the scan, one way or the other. Do note that this is true even
199 if the query ends abruptly (LIMIT is reached, or an error happens).
200 The easiest workaround for this is to simply call EndPSIBatchModeIfStarted()
201 on the root iterator at the end of the scan. See the PFSBatchMode class for
202 a useful helper.
203
204 The rules for starting batch and ending mode are:
205
206 1. If you are an iterator with exactly one child (FilterIterator etc.),
207 forward any StartPSIBatchMode() calls to it.
208 2. If you drive an iterator (read rows from it using a for loop
209 or similar), use PFSBatchMode as described above.
210 3. If you have multiple children, ignore the call and do your own
211 handling of batch mode as appropriate. For materialization,
212 #2 would typically apply. For joins, it depends on the join type
213 (e.g., NestedLoopIterator applies batch mode only when scanning
214 the innermost table).
215
216 The upshot of this is that when scanning a single table, batch mode
217 will typically be activated for that table (since we call
218 StartPSIBatchMode() on the root iterator, and it will trickle all the way
219 down to the table iterator), but for a join, the call will be ignored
220 and the join iterator will activate batch mode by itself as needed.
221 */
222 virtual void StartPSIBatchMode() {}
223
224 /**
225 Ends performance schema batch mode, if started. It's always safe to
226 call this.
227
228 Iterators that have children (composite iterators) must forward the
229 EndPSIBatchModeIfStarted() call to every iterator they could conceivably
230 have called StartPSIBatchMode() on. This ensures that after such a call
231 to on the root iterator, all handlers are out of batch mode.
232 */
233 virtual void EndPSIBatchModeIfStarted() {}
234
235 /**
236 If this iterator is wrapping a different iterator (e.g. TimingIterator<T>)
237 and you need to down_cast<> to a specific iterator type, this allows getting
238 at the wrapped iterator.
239 */
240 virtual RowIterator *real_iterator() { return this; }
241 virtual const RowIterator *real_iterator() const { return this; }
242
243 /// Returns the number of times Init() has been called on this iterator.
244 uint64_t num_init_calls() const { return m_num_init_calls; }
245
246 /// Returns the number of times Read() has returned a row successfully from
247 /// this iterator.
248 uint64_t num_rows() const { return m_num_rows; }
249
250 /// Returns the number of times the iterator has been fully read. That is, the
251 /// number of times Read() has returned EOF.
252 uint64_t num_full_reads() const { return m_num_full_reads; }
253
254 protected:
255 THD *thd() const { return m_thd; }
256
257 private:
258 virtual bool DoInit() = 0;
259 virtual int DoRead() = 0;
260
261 THD *const m_thd;
262 uint64_t m_num_init_calls{0};
263 uint64_t m_num_rows{0};
264 uint64_t m_num_full_reads{0};
265};
266
268 public:
270
271 void UnlockRow() override;
272 void SetNullRowFlag(bool is_null_row) override;
273 void StartPSIBatchMode() override;
274 void EndPSIBatchModeIfStarted() override;
275
276 protected:
277 int HandleError(int error);
278 void PrintError(int error);
279 TABLE *table() const { return m_table; }
280
281 private:
283
285};
286
287#endif // SQL_ITERATORS_ROW_ITERATOR_H_
An iterator that switches between another iterator (typically a RefIterator or similar) and a TableSc...
Definition: ref_row_iterators.h:263
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:928
Profiling data for an iterator, needed by 'EXPLAIN ANALYZE'.
Definition: row_iterator.h:41
virtual uint64_t GetNumRows() const =0
The number of rows fetched.
virtual uint64_t GetNumInitCalls() const =0
The number of loops (i.e number of iterator->Init() calls.
virtual ~IteratorProfiler()=default
virtual double GetLastRowMs() const =0
Time (in ms) spent fetching the remaining rows.
virtual double GetFirstRowMs() const =0
Time (in ms) spent fetching the first row.
Definition: sql_optimizer.h:133
A context for reading through a single table using a chosen access method: index read,...
Definition: row_iterator.h:82
THD * thd() const
Definition: row_iterator.h:255
virtual bool DoInit()=0
uint64_t num_rows() const
Returns the number of times Read() has returned a row successfully from this iterator.
Definition: row_iterator.h:248
virtual const IteratorProfiler * GetProfiler() const
Get profiling data for this iterator (for 'EXPLAIN ANALYZE').
Definition: row_iterator.h:172
virtual void SetNullRowFlag(bool is_null_row)=0
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
virtual void StartPSIBatchMode()
Start performance schema batch mode, if supported (otherwise ignored).
Definition: row_iterator.h:222
virtual void SetOverrideProfiler(const IteratorProfiler *profiler)
Definition: row_iterator.h:182
bool Init()
Initialize or reinitialize the iterator.
Definition: row_iterator.h:105
virtual int DoRead()=0
uint64_t m_num_rows
Definition: row_iterator.h:263
uint64_t num_init_calls() const
Returns the number of times Init() has been called on this iterator.
Definition: row_iterator.h:244
virtual void UnlockRow()=0
RowIterator(const RowIterator &)=delete
uint64_t num_full_reads() const
Returns the number of times the iterator has been fully read.
Definition: row_iterator.h:252
virtual ~RowIterator()=default
virtual const RowIterator * real_iterator() const
Definition: row_iterator.h:241
RowIterator(RowIterator &&)=default
virtual void EndPSIBatchModeIfStarted()
Ends performance schema batch mode, if started.
Definition: row_iterator.h:233
RowIterator(THD *thd)
Definition: row_iterator.h:86
uint64_t m_num_init_calls
Definition: row_iterator.h:262
int Read()
Read a single row.
Definition: row_iterator.h:124
THD *const m_thd
Definition: row_iterator.h:261
virtual RowIterator * real_iterator()
If this iterator is wrapping a different iterator (e.g.
Definition: row_iterator.h:240
uint64_t m_num_full_reads
Definition: row_iterator.h:264
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Definition: row_iterator.h:267
void UnlockRow() override
The default implementation of unlock-row method of RowIterator, used in all access methods except EQR...
Definition: basic_row_iterators.cc:199
int HandleError(int error)
Definition: basic_row_iterators.cc:209
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: basic_row_iterators.cc:232
void PrintError(int error)
Definition: basic_row_iterators.cc:224
TABLE * table() const
Definition: row_iterator.h:279
TableRowIterator(THD *thd, TABLE *table)
Definition: row_iterator.h:269
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: basic_row_iterators.cc:228
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: basic_row_iterators.cc:201
TABLE *const m_table
Definition: row_iterator.h:282
Definition: table.h:1435