MySQL 9.1.0
Source Code Documentation
cost_model.h
Go to the documentation of this file.
1/* Copyright (c) 2020, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef SQL_JOIN_OPTIMIZER_COST_MODEL_H_
25#define SQL_JOIN_OPTIMIZER_COST_MODEL_H_
26
27#include <algorithm> // std::clamp
28
29#include "my_base.h"
30#include "my_bitmap.h" // bitmap_bits_set
31
35#include "sql/mem_root_array.h"
36#include "sql/table.h"
37
38struct AccessPath;
40class Item;
41class Query_block;
42class THD;
43
44/**
45 When we make cost estimates, we use this as the maximal length the
46 values we get from evaluating an Item (in bytes). Actual values of
47 e.g. blobs may be much longer, but even so we use this as an upper
48 limit when doing cost calculations. (For context, @see Item#max_length .)
49*/
50constexpr size_t kMaxItemLengthEstimate = 4096;
51
52/// A fallback cardinality estimate that is used in case the storage engine
53/// cannot provide one (like for table functions). It's a fairly arbitrary
54/// non-zero value.
56
57/// See EstimateFilterCost.
58struct FilterCost {
59 /// Cost of evaluating the filter for all rows if subqueries are not
60 /// materialized. (Note that this includes the contribution from
61 /// init_cost_if_not_materialized.)
63
64 /// Initial cost before the filter can be applied for the first time.
65 /// Typically the cost of executing 'independent subquery' in queries like:
66 /// "SELECT * FROM tab WHERE field = <independent subquery>".
67 /// (That corresponds to the Item_singlerow_subselect class.)
69
70 /// Cost of evaluating the filter for all rows if all subqueries in
71 /// it have been materialized beforehand. If there are no subqueries
72 /// in the condition, equals cost_if_not_materialized.
74
75 /// Cost of materializing all subqueries present in the filter.
76 /// If there are no subqueries in the condition, equals zero.
78};
79
80/// Used internally by EstimateFilterCost() only.
81void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows,
82 FilterCost *cost);
83
84/**
85 Estimate the cost of evaluating “condition”, “num_rows” times.
86 This is a fairly rudimentary estimation, _but_ it includes the cost
87 of any subqueries that may be present and that need evaluation.
88 */
89FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition,
90 const Query_block *outer_query_block);
91
92/**
93 A cheaper overload of EstimateFilterCost() that assumes that all
94 contained subqueries have already been extracted (ie., it skips the
95 walking, which can be fairly expensive). This data is typically
96 computed by FindContainedSubqueries().
97 */
99 THD *thd, double num_rows,
100 const Mem_root_array<ContainedSubquery> &contained_subqueries) {
101 FilterCost cost;
104
105 for (const ContainedSubquery &subquery : contained_subqueries) {
106 AddCost(thd, subquery, num_rows, &cost);
107 }
108 return cost;
109}
110
111/**
112 Estimate costs and output rows for a SORT AccessPath.
113 @param thd Current thread.
114 @param path the AccessPath.
115 @param distinct_rows An estimate of the number of distinct rows, if
116 remove_duplicates==true and we have an estimate already.
117*/
119 double distinct_rows = kUnknownRowCount);
120
122
123/**
124 Estimate the number of rows with a distinct combination of values for
125 'terms'. @see EstimateDistinctRowsFromStatistics for additional details.
126 @param thd The current thread.
127 @param child_rows The number of input rows.
128 @param terms The terms for which we estimate the number of unique
129 combinations.
130 @returns The estimated number of output rows.
131*/
132double EstimateDistinctRows(THD *thd, double child_rows,
134/**
135 Estimate costs and result row count for an aggregate operation.
136 @param[in,out] thd The current thread.
137 @param[in,out] path The AGGREGATE path.
138 @param[in] query_block The Query_block to which 'path' belongs.
139 */
141 const Query_block *query_block);
144
145/// Estimate the costs and row count for a STREAM AccessPath.
147
148/**
149 Estimate the costs and row count for a WINDOW AccessPath. As described in
150 @see AccessPath::m_init_cost, the cost to read k out of N rows would be
151 init_cost + (k/N) * (cost - init_cost).
152*/
154
155/// Estimate the costs and row count for a Temp table Aggregate AccessPath.
157 const Query_block *query_block);
158
159/// Estimate the costs and row count for a WINDOW AccessPath.
161
162/**
163 Estimate the fan out for a left semijoin or a left antijoin. The fan out
164 is defined as the number of result rows, divided by the number of input
165 rows from the left hand relation. For a semijoin, J1:
166
167 SELECT ... FROM t1 WHERE EXISTS (SELECT ... FROM t2 WHERE predicate)
168
169 we know that the fan out of the corresponding inner join J2:
170
171 SELECT ... FROM t1, t2 WHERE predicate
172
173 is: F(J2) = CARD(t2) * SELECTIVITY(predicate) , where CARD(t2)=right_rows,
174 and SELECTIVITY(predicate)=edge.selectivity. If 'predicate' is a
175 deterministic function of t1 and t2 rows, then J1 is equivalent to an inner
176 join J3:
177
178 SELECT ... FROM t1 JOIN (SELECT DISTINCT f1,..fn FROM t2) d ON predicate
179
180 where f1,..fn are those fields from t2 that appear in the predicate.
181
182 Then F(J1) = F(J3) = F(J2) * CARD(d) / CARD(t2)
183 = CARD(d) * SELECTIVITY(predicate).
184
185 This function therefore collects f1..fn and estimates CARD(d). As a special
186 case, 'predicate' may be independent of t2. The query is then equivalent to:
187
188 SELECT ... FROM t1 WHERE predicate AND (SELECT COUNT(*) FROM t2) > 0
189
190 The fan out is then the selectivity of 'predicate' multiplied by the
191 probability of t2 having at least one row.
192
193 @param thd The current thread.
194 @param right_rows The number of input rows from the right hand relation.
195 @param edge Join edge.
196 @returns fan out.
197 */
198double EstimateSemijoinFanOut(THD *thd, double right_rows,
199 const JoinPredicate &edge);
200
201/**
202 Estimate the number of output rows from joining two relations.
203 @param thd The current thread.
204 @param left_rows Number of rows in the left hand relation.
205 @param right_rows Number of rows in the right hand relation.
206 @param edge The join between the two relations.
207*/
208inline double FindOutputRowsForJoin(THD *thd, double left_rows,
209 double right_rows,
210 const JoinPredicate *edge) {
211 switch (edge->expr->type) {
213 // For outer joins, every outer row produces at least one row (if none
214 // are matching, we get a NULL-complemented row).
215 // Note that this can cause inconsistent row counts; see bug #33550360
216 // and/or JoinHypergraph::has_reordered_left_joins.
217 return left_rows * std::max(right_rows * edge->selectivity, 1.0);
218
220 return left_rows * EstimateSemijoinFanOut(thd, right_rows, *edge);
221
223 // Antijoin are estimated as simply the opposite of semijoin (see above),
224 // but wrongly estimating 0 rows (or, of course, a negative amount) could
225 // be really bad, so we assume at least 10% coming out as a fudge factor.
226 // It's better to estimate too high than too low here.
227 return left_rows *
228 std::max(1.0 - EstimateSemijoinFanOut(thd, right_rows, *edge),
229 0.1);
230
233 return left_rows * right_rows * edge->selectivity;
234
235 case RelationalExpression::FULL_OUTER_JOIN: // Not implemented.
236 case RelationalExpression::MULTI_INNER_JOIN: // Should not appear here.
237 case RelationalExpression::TABLE: // Should not appear here.
238 assert(false);
239 return 0;
240
241 default:
242 assert(false);
243 return 0;
244 }
245}
246
247/**
248 Determines whether a given key on a table is both clustered and primary.
249
250 @param table The table to which the index belongs.
251 @param key_idx The position of the key in table->key_info[].
252
253 @return True if the key is clustered and primary, false otherwise.
254*/
255inline bool IsClusteredPrimaryKey(const TABLE *table, unsigned key_idx) {
256 if (table->s->is_missing_primary_key()) return false;
257 return key_idx == table->s->primary_key &&
258 table->file->primary_key_is_clustered();
259}
260
261/// The minimum number of bytes to return for row length estimates. This is
262/// mostly to guard against returning estimates of zero, which may or may not
263/// actually be able to happen in practice.
264constexpr unsigned kMinEstimatedBytesPerRow = 8;
265
266/// The maximum number of bytes to return for row length estimates. The current
267/// value of this constant is set to cover a wide range of row sizes and should
268/// capture the most common row lengths in bytes. We place an upper limit on the
269/// estimates since we have performed our calibration within this range, and we
270/// would like to prevent cost estimates from running away in case the
271/// underlying statistics are off in some instances. In such cases we prefer
272/// capping the resulting estimate. As a reasonable upper limit we use the
273/// default InnoDB page size of 2^14 = 16384 bytes.
274constexpr unsigned kMaxEstimatedBytesPerRow = 16384;
275
276/**
277 Estimates the number of bytes that MySQL must process when reading a row from
278 a table, independently of the size of the read set.
279
280 @param table The table to produce an estimate for.
281
282 @returns The estimated number of bytes.
283
284 @note There are two different relevant concepts of bytes per row:
285
286 1. The bytes per row on the storage engine side.
287 2. The bytes per row on the server side.
288
289 One the storage engine side, for InnoDB at least, we compute
290 stats.mean_rec_length as the size of the data file divided by the number of
291 rows in the table.
292
293 On the server side we are interested in the size of the MySQL representation
294 in bytes. This could be a more accurate statistic when determining the CPU
295 cost of processing a row (i.e., it does not matter very much if InnoDB pages
296 are only half-full). As an approximation to the size of a row in bytes on the
297 server side we use the length of the record buffer. This does not accurately
298 represent the size of some variable length fields as we only store pointers to
299 such fields in the record buffer. So in a sense the record buffer length is an
300 estimate for the bytes per row but with an 8-byte cap on variable length
301 fields -- i.e. better than no estimate, and capped to ensure that costs do not
302 explode.
303
304 For now, we will use the record buffer length (share->rec_buff_length) since
305 it is more robust compared to the storage engine mean record length
306 (file->stats.mean_rec_length) in the following cases:
307
308 - When the table fits in a single page stats.mean_rec_length will tend to
309 overestimate the record length since it is computed as
310 stats.data_file_length / stats.records and the data file length is at least
311 a full page which defaults to 16384 bytes (for InnoDB at least).
312
313 - When the table contains records that are stored off-page it would seem that
314 stats->data_file_length includes the overflow pages which are not relevant
315 when estimating the height of the B-tree.
316
317 The downside of using this estimate is that we do not accurately account for
318 the presence of variable length fields that are stored in-page.
319*/
320inline unsigned EstimateBytesPerRowTable(const TABLE *table) {
321 return std::clamp(table->s->rec_buff_length, kMinEstimatedBytesPerRow,
323}
324
325/**
326 Estimates the number of bytes that MySQL must process when reading a row from
327 a secondary index, independently of the size of the read set.
328
329 @param table The table to which the index belongs.
330 @param key_idx The position of the key in table->key_info[].
331
332 @return The estimated number of bytes per row in the index.
333*/
334inline unsigned EstimateBytesPerRowIndex(const TABLE *table, unsigned key_idx) {
335 // key_length should correspond to the length of the field(s) of the key in
336 // bytes and ref_length is the length of the field(s) of the primary key in
337 // bytes. Secondary indexes (in InnoDB) contain a copy of the value of the
338 // primary key associated with a given row, in order to make it possible to
339 // retrieve the corresponding row from the primary index in case we use a
340 // non-covering index operation.
341 unsigned estimate =
342 table->key_info[key_idx].key_length + table->file->ref_length;
343 return std::clamp(estimate, kMinEstimatedBytesPerRow,
345}
346
347/**
348 Estimates the height of a B-tree index.
349
350 We estimate the height of the index to be the smallest positive integer h such
351 that table_records <= (1 + records_per_page)^h.
352
353 This function supports both clustered primary indexes and secondary indexes.
354 Secondary indexes will tend to have more records per page compared to primary
355 clustered indexes and as a consequence they will tend to be shorter.
356
357 @param table The table to which the index belongs.
358 @param key_idx The position of the key in table->key_info[].
359
360 @return The estimated height of the index.
361*/
362inline int IndexHeight(const TABLE *table, unsigned key_idx) {
363 // We clamp the block size to lie in the interval between the max and min
364 // allowed block size for InnoDB (2^12 to 2^16). Ideally we would have a
365 // guarantee that stats.block_size has a reasonable value (across all storage
366 // engines, types of tables, state of statistics), but in the absence of such
367 // a guarantee we clamp to the values for the InnoDB storage engine since the
368 // cost model has been calibrated for these values.
369 constexpr unsigned kMinEstimatedBlockSize = 4096;
370 constexpr unsigned kMaxEstimatedBlockSize = 65536;
371 unsigned block_size =
372 std::clamp(table->file->stats.block_size, kMinEstimatedBlockSize,
373 kMaxEstimatedBlockSize);
374 unsigned bytes_per_row = IsClusteredPrimaryKey(table, key_idx)
377
378 // Ideally we should always have that block_size >= bytes_per_row, but since
379 // the storage engine and MySQL row formats differ, this is not always the
380 // case. Therefore we manually ensure that records_per_page >= 1.0.
381 double records_per_page =
382 std::max(1.0, static_cast<double>(block_size) / bytes_per_row);
383
384 // Computing the height using a while loop instead of using std::log turns out
385 // to be about 5 times faster in microbenchmarks when the measurement is made
386 // using a somewhat realistic and representative set of values for the number
387 // of records per page and the number of records in the table. In the worst
388 // case, if the B-tree contains only a single record per page, the table would
389 // have to contain 2^30 pages (corresponding to more than 16 terabytes of
390 // data) for this loop to run 30 times. A B-tree with 1 billion records and
391 // 100 records per page only uses 4 iterations of the loop (the height is 5).
392 int height = 1;
393 double r = 1.0 + records_per_page;
394 while (r < table->file->stats.records) {
395 r = r * (1.0 + records_per_page);
396 height += 1;
397 }
398 return height;
399}
400
401/**
402 Computes the expected cost of reading a number of rows. The cost model takes
403 into account the number of fields that is being read from the row and the
404 width of the row in bytes. Both RowReadCostTable() and RowReadCostIndex() call
405 this function and thus use the same cost model.
406
407 @param num_rows The (expected) number of rows to read.
408 @param fields_read_per_row The number of fields to read per row.
409 @param bytes_per_row The total length of the row to be processed (including
410 fields that are not read) in bytes.
411
412 @returns The expected cost of reading num_rows.
413
414 @note It is important that this function be robust to fractional row
415 estimates. For example, if we index nested loop join two primary key columns
416 and the inner table is smaller than the outer table, we should see that
417 num_rows for the inner index lookup is less than one. In this case it is
418 important that we return the expected cost of the operation. For example, if
419 we only expect to read 0.1 rows the cost should be 0.1 of the cost of reading
420 one row (we are working with a linear cost model, so we only have to take the
421 expected number of rows into account, and not the complete distribution).
422*/
423inline double RowReadCost(double num_rows, double fields_read_per_row,
424 double bytes_per_row) {
425 return (kReadOneRowCost + kReadOneFieldCost * fields_read_per_row +
426 kReadOneByteCost * bytes_per_row) *
427 num_rows;
428}
429
430/**
431 Computes the cost of reading a number of rows from a table.
432 @see ReadRowCost() for further details.
433
434 @param table The table to read from.
435 @param num_rows The (expected) number of rows to read.
436
437 @returns The cost of reading num_rows.
438*/
439inline double RowReadCostTable(const TABLE *table, double num_rows) {
440 double fields_read_per_row = bitmap_bits_set(table->read_set);
441 double bytes_per_row = EstimateBytesPerRowTable(table);
442 return RowReadCost(num_rows, fields_read_per_row, bytes_per_row);
443}
444
445/**
446 Computes the cost of reading a number of rows from an index.
447 @see ReadRowCost() for further details.
448
449 @param table The table to which the index belongs.
450 @param key_idx The position of the key in table->key_info[].
451 @param num_rows The (expected) number of rows to read.
452
453 @returns The cost of reading num_rows.
454*/
455inline double RowReadCostIndex(const TABLE *table, unsigned key_idx,
456 double num_rows) {
457 if (IsClusteredPrimaryKey(table, key_idx)) {
458 return RowReadCostTable(table, num_rows);
459 }
460 // Assume we read two fields from the index record if it is not covering. The
461 // exact assumption here is not very important as the cost should be dominated
462 // by the additional lookup into the primary index.
463 constexpr double kDefaultFieldsReadFromCoveringIndex = 2;
464 double fields_read_per_row = table->covering_keys.is_set(key_idx)
465 ? bitmap_bits_set(table->read_set)
466 : kDefaultFieldsReadFromCoveringIndex;
467
468 double bytes_per_row = EstimateBytesPerRowIndex(table, key_idx);
469 return RowReadCost(num_rows, fields_read_per_row, bytes_per_row);
470}
471
472/**
473 Estimates the cost of a full table scan. Primarily used to assign a cost to
474 the TABLE_SCAN access path.
475
476 @param table The table to estimate cost for.
477
478 @returns The cost of scanning the table.
479*/
480inline double EstimateTableScanCost(const TABLE *table) {
481 return RowReadCostTable(table, table->file->stats.records);
482}
483
484/**
485 Estimates the cost of an index lookup.
486
487 @param table The table to which the index belongs.
488 @param key_idx The position of the key in table->key_info[].
489
490 @return The estimated cost of an index lookup.
491
492 @note The model "cost ~ index_height" works well when the Adaptive Hash Index
493 (AHI) is disabled. The AHI essentially works as a dynamic cache for the most
494 frequently accessed index pages that sits on top of the B-tree. With AHI
495 enabled the cost of random lookups does not appear to be predictable using
496 standard explanatory variables such as index height or the logarithm of the
497 number of rows in the index. The performance of AHI will also be dependent on
498 the access pattern, so it is fundamentally difficult to create an accurate
499 model. However, our calibration experiments reveal two things that hold true
500 both with and without AHI enabled:
501
502 1. Index lookups usually take 1-3 microseconds. The height of a B-tree grows
503 very slowly (proportional to log(N)/log(R) for tables with N rows and R
504 records per page), making it near-constant in the common case where tables
505 have many records per page, even without AHI.
506
507 2. Random lookups in large indexes tend to be slower. A random access pattern
508 will cause more cache misses, both for regular hardware caching and AHI. In
509 addition, for a larger B-tree only the top levels fit in cache and we will
510 invariably see a few cache misses with random accesses.
511
512 We model the cost of index lookups by interpolating between a model with
513 constant cost and a model that depends entirely on the height of the index.
514 The constants are based on calibration experiments with and without AHI.
515
516 Another factor that is important when calibrating the cost of index lookups is
517 whether we are interested in the average cost when performing many lookups
518 such as when performing an index nested loop join or scanning along a
519 secondary non-covering index and lookup into the primary index, or the cost of
520 a single lookup such as a point select that uses an index. From our
521 measurements we see that the average running time of an index lookup can
522 easily be a factor ~5x faster when performing thousands of successive lookups
523 compared to just one. This is likely due to hardware caching effects. Since
524 getting the cost right in the case when we perform many lookups is typically
525 more important, we have opted to calibrate costs based on operations that
526 perform many lookups.
527
528 For adding IO costs to this model (future work) we probably want to assume
529 that we only fetch a single page when performing an index lookup, as
530 everything but leaf pages will tend to be cached, at least when performing
531 many index lookups in a query plan, which is exactly the case where it is
532 important to get costs right.
533*/
534inline double IndexLookupCost(const TABLE *table, unsigned key_idx) {
535 assert(key_idx < table->s->keys);
536 double cost_with_ahi = kIndexLookupFixedCost;
537 double cost_without_ahi = kIndexLookupPageCost * IndexHeight(table, key_idx);
538 return 0.5 * (cost_with_ahi + cost_without_ahi);
539}
540
541/**
542 Estimates the cost of an index range scan.
543
544 The cost model for index range scans accounts for the index lookup cost as
545 well as the cost of reading rows. Both index scans and ref accesses can be
546 viewed as special cases of index range scans, so the cost functions for those
547 operations call this function under the hood.
548
549 @note In the future this function should be extended to account for IO cost.
550
551 @param table The table to which the index belongs.
552 @param key_idx The position of the key in table->key_info[].
553 @param num_ranges The number of ranges.
554 @param num_output_rows The estimated expected number of output rows.
555
556 @returns The estimated cost of the index range scan operation.
557*/
558inline double EstimateIndexRangeScanCost(const TABLE *table, unsigned key_idx,
559 double num_ranges,
560 double num_output_rows) {
561 double cost = num_ranges * IndexLookupCost(table, key_idx) +
562 RowReadCostIndex(table, key_idx, num_output_rows);
563
564 if (!IsClusteredPrimaryKey(table, key_idx) &&
565 !table->covering_keys.is_set(key_idx)) {
566 // If we are operating on a secondary non-covering index we have to perform
567 // a lookup into the primary index for each matching row. This is the case
568 // for the InnoDB storage engine, but with the MEMORY engine we do not have
569 // a primary key, so we instead assign a default lookup cost.
570 double lookup_cost = table->s->is_missing_primary_key()
572 : IndexLookupCost(table, table->s->primary_key);
573
574 // When this function is called by e.g. EstimateRefAccessCost() we can have
575 // num_output_rows < 1 and it becomes important that our cost estimate
576 // reflects expected cost, i.e. that it scales linearly with the expected
577 // number of output rows.
578 cost += num_output_rows * lookup_cost +
579 RowReadCostTable(table, num_output_rows);
580 }
581 return cost;
582}
583
584/**
585 Estimates the cost of an index scan. An index scan scans all rows in the
586 table along the supplied index.
587
588 @param table The table to which the index belongs.
589 @param key_idx The position of the key in table->key_info[].
590
591 @returns The estimated cost of the index scan.
592*/
593inline double EstimateIndexScanCost(const TABLE *table, unsigned key_idx) {
594 return EstimateIndexRangeScanCost(table, key_idx, 1.0,
595 table->file->stats.records);
596}
597
598/**
599 Estimates the cost of an index lookup (ref access).
600
601 @param table The table to which the index belongs.
602 @param key_idx The position of the key in table->key_info[].
603 @param num_output_rows The estimated number of output rows.
604
605 @returns The estimated cost of the index scan.
606*/
607inline double EstimateRefAccessCost(const TABLE *table, unsigned key_idx,
608 double num_output_rows) {
609 // We want the optimizer to prefer ref acccesses to range scans when they both
610 // have the same cost. This is particularly important for the EQ_REF access
611 // path (index lookups with at most one matching row) since the EQ_REF
612 // iterator uses caching to improve performance.
613 constexpr double kRefAccessCostDiscount = 0.05;
614 return (1.0 - kRefAccessCostDiscount) *
615 EstimateIndexRangeScanCost(table, key_idx, 1.0, num_output_rows);
616}
617
618#endif // SQL_JOIN_OPTIMIZER_COST_MODEL_H_
constexpr double kUnknownRowCount
To indicate that a row estimate is not yet made.
Definition: access_path.h:185
A wrapper class which provides array bounds checking.
Definition: sql_array.h:47
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:930
A typesafe replacement for DYNAMIC_ARRAY.
Definition: mem_root_array.h:426
This class represents a query block, aka a query specification, which is a query consisting of a SELE...
Definition: sql_lex.h:1159
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
Hypergraph optimizer cost constants.
constexpr double kReadOneFieldCost
Cost of per field in the read set.
Definition: cost_constants.h:85
constexpr double kApplyOneFilterCost
Cost of evaluating one filter on one row.
Definition: cost_constants.h:117
constexpr double kReadOneRowCost
Fixed cost of reading a row from the storage engine into the record buffer.
Definition: cost_constants.h:81
constexpr double kIndexLookupPageCost
The cost per page that is visited when performing an index lookup in an InnoDB B-tree.
Definition: cost_constants.h:129
constexpr double kIndexLookupFixedCost
Fixed cost of an index lookup when AHI is enabled (default).
Definition: cost_constants.h:132
constexpr double kReadOneByteCost
Overhead per byte when reading a row.
Definition: cost_constants.h:95
constexpr double kIndexLookupDefaultCost
Default cost of an index lookup when we are missing information to compute a more accurate cost estim...
Definition: cost_constants.h:138
double RowReadCost(double num_rows, double fields_read_per_row, double bytes_per_row)
Computes the expected cost of reading a number of rows.
Definition: cost_model.h:423
constexpr ha_rows kRowEstimateFallback
A fallback cardinality estimate that is used in case the storage engine cannot provide one (like for ...
Definition: cost_model.h:55
double EstimateRefAccessCost(const TABLE *table, unsigned key_idx, double num_output_rows)
Estimates the cost of an index lookup (ref access).
Definition: cost_model.h:607
void EstimateSortCost(THD *thd, AccessPath *path, double distinct_rows=kUnknownRowCount)
Estimate costs and output rows for a SORT AccessPath.
Definition: cost_model.cc:69
void EstimateTemptableAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate the costs and row count for a Temp table Aggregate AccessPath.
Definition: cost_model.cc:975
void EstimateWindowCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:1043
bool IsClusteredPrimaryKey(const TABLE *table, unsigned key_idx)
Determines whether a given key on a table is both clustered and primary.
Definition: cost_model.h:255
double EstimateTableScanCost(const TABLE *table)
Estimates the cost of a full table scan.
Definition: cost_model.h:480
constexpr size_t kMaxItemLengthEstimate
When we make cost estimates, we use this as the maximal length the values we get from evaluating an I...
Definition: cost_model.h:50
void AddCost(THD *thd, const ContainedSubquery &subquery, double num_rows, FilterCost *cost)
Used internally by EstimateFilterCost() only.
Definition: cost_model.cc:126
double EstimateIndexScanCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index scan.
Definition: cost_model.h:593
void EstimateLimitOffsetCost(AccessPath *path)
Estimate the costs and row count for a WINDOW AccessPath.
Definition: cost_model.cc:941
double RowReadCostIndex(const TABLE *table, unsigned key_idx, double num_rows)
Computes the cost of reading a number of rows from an index.
Definition: cost_model.h:455
void EstimateDeleteRowsCost(AccessPath *path)
Definition: cost_model.cc:891
constexpr unsigned kMinEstimatedBytesPerRow
The minimum number of bytes to return for row length estimates.
Definition: cost_model.h:264
int IndexHeight(const TABLE *table, unsigned key_idx)
Estimates the height of a B-tree index.
Definition: cost_model.h:362
void EstimateStreamCost(AccessPath *path)
Estimate the costs and row count for a STREAM AccessPath.
Definition: cost_model.cc:925
double FindOutputRowsForJoin(THD *thd, double left_rows, double right_rows, const JoinPredicate *edge)
Estimate the number of output rows from joining two relations.
Definition: cost_model.h:208
void EstimateUpdateRowsCost(AccessPath *path)
Definition: cost_model.cc:908
void EstimateMaterializeCost(THD *thd, AccessPath *path)
Definition: cost_model.cc:182
double IndexLookupCost(const TABLE *table, unsigned key_idx)
Estimates the cost of an index lookup.
Definition: cost_model.h:534
double EstimateIndexRangeScanCost(const TABLE *table, unsigned key_idx, double num_ranges, double num_output_rows)
Estimates the cost of an index range scan.
Definition: cost_model.h:558
unsigned EstimateBytesPerRowTable(const TABLE *table)
Estimates the number of bytes that MySQL must process when reading a row from a table,...
Definition: cost_model.h:320
double EstimateDistinctRows(THD *thd, double child_rows, Bounds_checked_array< const Item *const > terms)
Estimate the number of rows with a distinct combination of values for 'terms'.
void EstimateAggregateCost(THD *thd, AccessPath *path, const Query_block *query_block)
Estimate costs and result row count for an aggregate operation.
Definition: cost_model.cc:872
unsigned EstimateBytesPerRowIndex(const TABLE *table, unsigned key_idx)
Estimates the number of bytes that MySQL must process when reading a row from a secondary index,...
Definition: cost_model.h:334
FilterCost EstimateFilterCost(THD *thd, double num_rows, Item *condition, const Query_block *outer_query_block)
Estimate the cost of evaluating “condition”, “num_rows” times.
Definition: cost_model.cc:167
constexpr unsigned kMaxEstimatedBytesPerRow
The maximum number of bytes to return for row length estimates.
Definition: cost_model.h:274
double EstimateSemijoinFanOut(THD *thd, double right_rows, const JoinPredicate &edge)
Estimate the fan out for a left semijoin or a left antijoin.
Definition: cost_model.cc:1053
double RowReadCostTable(const TABLE *table, double num_rows)
Computes the cost of reading a number of rows from a table.
Definition: cost_model.h:439
This file includes constants used by all storage engines.
my_off_t ha_rows
Definition: my_base.h:1141
uint bitmap_bits_set(const MY_BITMAP *map)
Definition: my_bitmap.cc:416
static char * path
Definition: mysqldump.cc:149
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
Definition: os0file.h:89
T clamp(U x)
Definition: ut0ut.h:415
const mysql_service_registry_t * r
Definition: pfs_example_plugin_employee.cc:86
Access paths are a query planning structure that correspond 1:1 to iterators, in that an access path ...
Definition: access_path.h:227
This class represents a subquery contained in some subclass of Item_subselect,.
Definition: item.h:862
See EstimateFilterCost.
Definition: cost_model.h:58
double init_cost_if_not_materialized
Initial cost before the filter can be applied for the first time.
Definition: cost_model.h:68
double cost_to_materialize
Cost of materializing all subqueries present in the filter.
Definition: cost_model.h:77
double cost_if_materialized
Cost of evaluating the filter for all rows if all subqueries in it have been materialized beforehand.
Definition: cost_model.h:73
double cost_if_not_materialized
Cost of evaluating the filter for all rows if subqueries are not materialized.
Definition: cost_model.h:62
A specification that two specific relational expressions (e.g., two tables, or a table and a join bet...
Definition: access_path.h:79
RelationalExpression * expr
Definition: access_path.h:80
double selectivity
Definition: access_path.h:81
enum RelationalExpression::Type type
@ SEMIJOIN
Left semijoin.
Definition: relational_expression.h:153
@ MULTI_INNER_JOIN
Definition: relational_expression.h:175
@ STRAIGHT_INNER_JOIN
Definition: relational_expression.h:161
@ ANTIJOIN
Left antijoin.
Definition: relational_expression.h:156
@ INNER_JOIN
Definition: relational_expression.h:149
@ FULL_OUTER_JOIN
Definition: relational_expression.h:168
@ TABLE
Definition: relational_expression.h:177
@ LEFT_JOIN
Definition: relational_expression.h:150
Definition: table.h:1421