MySQL 8.4.0
Source Code Documentation
row_iterator.h
Go to the documentation of this file.
1#ifndef SQL_ITERATORS_ROW_ITERATOR_H_
2#define SQL_ITERATORS_ROW_ITERATOR_H_
3
4/* Copyright (c) 2018, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27#include <assert.h>
28#include <string>
29
30class Item;
31class JOIN;
32class THD;
33struct TABLE;
34
35/**
36 Profiling data for an iterator, needed by 'EXPLAIN ANALYZE'.
37 Note that an iterator may be iterated over multiple times, e.g. if it is
38 the inner operand of a neste loop join. This is denoted 'loops'
39 below, and the metrics in this class are aggregated values for all loops.
40*/
42 public:
43 /** Time (in ms) spent fetching the first row. (Sum for all loops.)*/
44 virtual double GetFirstRowMs() const = 0;
45
46 /** Time (in ms) spent fetching the remaining rows. (Sum for all loops.)*/
47 virtual double GetLastRowMs() const = 0;
48
49 /** The number of loops (i.e number of iterator->Init() calls.*/
50 virtual uint64_t GetNumInitCalls() const = 0;
51
52 /** The number of rows fetched. (Sum for all loops.)*/
53 virtual uint64_t GetNumRows() const = 0;
54 virtual ~IteratorProfiler() = default;
55};
56
57/**
58 A context for reading through a single table using a chosen access method:
59 index read, scan, etc, use of cache, etc.. It is mostly meant as an interface,
60 but also contains some private member functions that are useful for many
61 implementations, such as error handling.
62
63 A RowIterator is a simple iterator; you initialize it, and then read one
64 record at a time until Read() returns EOF. A RowIterator can read from
65 other Iterators if you want to, e.g., SortingIterator, which takes in records
66 from another RowIterator and sorts them.
67
68 The abstraction is not completely tight. In particular, it still leaves some
69 specifics to TABLE, such as which columns to read (the read_set). This means
70 it would probably be hard as-is to e.g. sort a join of two tables.
71
72 Use by:
73@code
74 unique_ptr<RowIterator> iterator(new ...);
75 if (iterator->Init())
76 return true;
77 while (iterator->Read() == 0) {
78 ...
79 }
80@endcode
81 */
83 public:
84 // NOTE: Iterators should typically be instantiated using NewIterator,
85 // in sql/iterators/timing_iterator.h.
86 explicit RowIterator(THD *thd) : m_thd(thd) {}
87 virtual ~RowIterator() = default;
88
89 RowIterator(const RowIterator &) = delete;
90
91 // Default move ctor used by IndexRangeScanIterator.
92 RowIterator(RowIterator &&) = default;
93
94 /**
95 Initialize or reinitialize the iterator. You must always call Init()
96 before trying a Read() (but Init() does not imply Read()).
97
98 You can call Init() multiple times; subsequent calls will rewind the
99 iterator (or reposition it, depending on whether the iterator takes in
100 e.g. a Index_lookup) and allow you to read the records anew.
101 */
102 virtual bool Init() = 0;
103
104 /**
105 Read a single row. The row data is not actually returned from the function;
106 it is put in the table's (or tables', in case of a join) record buffer, ie.,
107 table->records[0].
108
109 @retval
110 0 OK
111 @retval
112 -1 End of records
113 @retval
114 1 Error
115 */
116 virtual int Read() = 0;
117
118 /**
119 Mark the current row buffer as containing a NULL row or not, so that if you
120 read from it and the flag is true, you'll get only NULLs no matter what is
121 actually in the buffer (typically some old leftover row). This is used
122 for outer joins, when an iterator hasn't produced any rows and we need to
123 produce a NULL-complemented row. Init() or Read() won't necessarily
124 reset this flag, so if you ever set is to true, make sure to also set it
125 to false when needed.
126
127 Note that this can be called without Init() having been called first.
128 For example, NestedLoopIterator can hit EOF immediately on the outer
129 iterator, which means the inner iterator doesn't get an Init() call,
130 but will still forward SetNullRowFlag to both inner and outer iterators.
131
132 TODO: We shouldn't need this. See the comments on AggregateIterator for
133 a bit more discussion on abstracting out a row interface.
134 */
135 virtual void SetNullRowFlag(bool is_null_row) = 0;
136
137 // In certain queries, such as SELECT FOR UPDATE, UPDATE or DELETE queries,
138 // reading rows will automatically take locks on them. (This means that the
139 // set of locks taken will depend on whether e.g. the optimizer chose a table
140 // scan or used an index, due to InnoDB's row locking scheme with “gap locks”
141 // for B-trees instead of full predicate locks.)
142 //
143 // However, under some transaction isolation levels (READ COMMITTED or
144 // less strict), it is possible to release such locks if and only if the row
145 // failed a WHERE predicate, as only the returned rows are protected,
146 // not _which_ rows are returned. Thus, if Read() returned a row that you did
147 // not actually use, you should call UnlockRow() afterwards, which allows the
148 // storage engine to release the row lock in such situations.
149 //
150 // TableRowIterator has a default implementation of this; other iterators
151 // should usually either forward the call to their source iterator (if any)
152 // or just ignore it. The right behavior depends on the iterator.
153 virtual void UnlockRow() = 0;
154
155 /** Get profiling data for this iterator (for 'EXPLAIN ANALYZE').*/
156 virtual const IteratorProfiler *GetProfiler() const {
157 /**
158 Valid for TimingIterator, MaterializeIterator and
159 TemptableAggregateIterator only.
160 */
161 assert(false);
162 return nullptr;
163 }
164
165 /** @see TimingIterator .*/
166 virtual void SetOverrideProfiler([
167 [maybe_unused]] const IteratorProfiler *profiler) {
168 // Valid for TimingIterator only.
169 assert(false);
170 }
171
172 /**
173 Start performance schema batch mode, if supported (otherwise ignored).
174
175 PFS batch mode is a mitigation to reduce the overhead of performance schema,
176 typically applied at the innermost table of the entire join. If you start
177 it before scanning the table and then end it afterwards, the entire set
178 of handler calls will be timed only once, as a group, and the costs will
179 be distributed evenly out. This reduces timer overhead.
180
181 If you start PFS batch mode, you must also take care to end it at the
182 end of the scan, one way or the other. Do note that this is true even
183 if the query ends abruptly (LIMIT is reached, or an error happens).
184 The easiest workaround for this is to simply call EndPSIBatchModeIfStarted()
185 on the root iterator at the end of the scan. See the PFSBatchMode class for
186 a useful helper.
187
188 The rules for starting batch and ending mode are:
189
190 1. If you are an iterator with exactly one child (FilterIterator etc.),
191 forward any StartPSIBatchMode() calls to it.
192 2. If you drive an iterator (read rows from it using a for loop
193 or similar), use PFSBatchMode as described above.
194 3. If you have multiple children, ignore the call and do your own
195 handling of batch mode as appropriate. For materialization,
196 #2 would typically apply. For joins, it depends on the join type
197 (e.g., NestedLoopIterator applies batch mode only when scanning
198 the innermost table).
199
200 The upshot of this is that when scanning a single table, batch mode
201 will typically be activated for that table (since we call
202 StartPSIBatchMode() on the root iterator, and it will trickle all the way
203 down to the table iterator), but for a join, the call will be ignored
204 and the join iterator will activate batch mode by itself as needed.
205 */
206 virtual void StartPSIBatchMode() {}
207
208 /**
209 Ends performance schema batch mode, if started. It's always safe to
210 call this.
211
212 Iterators that have children (composite iterators) must forward the
213 EndPSIBatchModeIfStarted() call to every iterator they could conceivably
214 have called StartPSIBatchMode() on. This ensures that after such a call
215 to on the root iterator, all handlers are out of batch mode.
216 */
217 virtual void EndPSIBatchModeIfStarted() {}
218
219 /**
220 If this iterator is wrapping a different iterator (e.g. TimingIterator<T>)
221 and you need to down_cast<> to a specific iterator type, this allows getting
222 at the wrapped iterator.
223 */
224 virtual RowIterator *real_iterator() { return this; }
225 virtual const RowIterator *real_iterator() const { return this; }
226
227 protected:
228 THD *thd() const { return m_thd; }
229
230 private:
231 THD *const m_thd;
232};
233
235 public:
237
238 void UnlockRow() override;
239 void SetNullRowFlag(bool is_null_row) override;
240 void StartPSIBatchMode() override;
241 void EndPSIBatchModeIfStarted() override;
242
243 protected:
244 int HandleError(int error);
245 void PrintError(int error);
246 TABLE *table() const { return m_table; }
247
248 private:
250
252};
253
254#endif // SQL_ITERATORS_ROW_ITERATOR_H_
An iterator that switches between another iterator (typically a RefIterator or similar) and a TableSc...
Definition: ref_row_iterators.h:263
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:934
Profiling data for an iterator, needed by 'EXPLAIN ANALYZE'.
Definition: row_iterator.h:41
virtual uint64_t GetNumRows() const =0
The number of rows fetched.
virtual uint64_t GetNumInitCalls() const =0
The number of loops (i.e number of iterator->Init() calls.
virtual ~IteratorProfiler()=default
virtual double GetLastRowMs() const =0
Time (in ms) spent fetching the remaining rows.
virtual double GetFirstRowMs() const =0
Time (in ms) spent fetching the first row.
Definition: sql_optimizer.h:133
A context for reading through a single table using a chosen access method: index read,...
Definition: row_iterator.h:82
THD * thd() const
Definition: row_iterator.h:228
virtual const IteratorProfiler * GetProfiler() const
Get profiling data for this iterator (for 'EXPLAIN ANALYZE').
Definition: row_iterator.h:156
virtual void SetNullRowFlag(bool is_null_row)=0
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
virtual void StartPSIBatchMode()
Start performance schema batch mode, if supported (otherwise ignored).
Definition: row_iterator.h:206
virtual void UnlockRow()=0
RowIterator(const RowIterator &)=delete
virtual ~RowIterator()=default
virtual const RowIterator * real_iterator() const
Definition: row_iterator.h:225
RowIterator(RowIterator &&)=default
virtual void EndPSIBatchModeIfStarted()
Ends performance schema batch mode, if started.
Definition: row_iterator.h:217
RowIterator(THD *thd)
Definition: row_iterator.h:86
THD *const m_thd
Definition: row_iterator.h:231
virtual RowIterator * real_iterator()
If this iterator is wrapping a different iterator (e.g.
Definition: row_iterator.h:224
virtual void SetOverrideProfiler([[maybe_unused]] const IteratorProfiler *profiler)
Definition: row_iterator.h:166
virtual int Read()=0
Read a single row.
virtual bool Init()=0
Initialize or reinitialize the iterator.
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Definition: row_iterator.h:234
void UnlockRow() override
The default implementation of unlock-row method of RowIterator, used in all access methods except EQR...
Definition: basic_row_iterators.cc:214
int HandleError(int error)
Definition: basic_row_iterators.cc:224
void EndPSIBatchModeIfStarted() override
Ends performance schema batch mode, if started.
Definition: basic_row_iterators.cc:247
void PrintError(int error)
Definition: basic_row_iterators.cc:239
TABLE * table() const
Definition: row_iterator.h:246
TableRowIterator(THD *thd, TABLE *table)
Definition: row_iterator.h:236
void StartPSIBatchMode() override
Start performance schema batch mode, if supported (otherwise ignored).
Definition: basic_row_iterators.cc:243
void SetNullRowFlag(bool is_null_row) override
Mark the current row buffer as containing a NULL row or not, so that if you read from it and the flag...
Definition: basic_row_iterators.cc:216
TABLE *const m_table
Definition: row_iterator.h:249
Definition: table.h:1405