8.14 Example

Here we take an example from the test suite.

#
# Tracing of ORDER BY & GROUP BY simplification.
#
SET OPTIMIZER_TRACE="enabled=on",END_MARKERS_IN_JSON=on; # be readable
SET OPTIMIZER_TRACE_MAX_MEM_SIZE=1000000; # avoid small default
CREATE TABLE t1 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1)
);
INSERT INTO t1 VALUES
(10,7,'v','v'),(11,0,'s','s'),(12,9,'l','l'),(13,3,'y','y'),(14,4,'c','c'),
(15,2,'i','i'),(16,5,'h','h'),(17,3,'q','q'),(18,1,'a','a'),(19,3,'v','v'),
(20,6,'u','u'),(21,7,'s','s'),(22,5,'y','y'),(23,1,'z','z'),(24,204,'h','h'),
(25,224,'p','p'),(26,9,'e','e'),(27,5,'i','i'),(28,0,'y','y'),(29,3,'w','w');
CREATE TABLE t2 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1),
PRIMARY KEY (pk)
);
INSERT INTO t2 VALUES
(1,4,'b','b'),(2,8,'y','y'),(3,0,'p','p'),(4,0,'f','f'),(5,0,'p','p'),
(6,7,'d','d'),(7,7,'f','f'),(8,5,'j','j'),(9,3,'e','e'),(10,188,'u','u'),
(11,4,'v','v'),(12,9,'u','u'),(13,6,'i','i'),(14,1,'x','x'),(15,5,'l','l'),
(16,6,'q','q'),(17,2,'n','n'),(18,4,'r','r'),(19,231,'c','c'),(20,4,'h','h'),
(21,3,'k','k'),(22,3,'t','t'),(23,7,'t','t'),(24,6,'k','k'),(25,7,'g','g'),
(26,9,'z','z'),(27,4,'n','n'),(28,4,'j','j'),(29,2,'l','l'),(30,1,'d','d'),
(31,2,'t','t'),(32,194,'y','y'),(33,2,'i','i'),(34,3,'j','j'),(35,8,'r','r'),
(36,4,'b','b'),(37,9,'o','o'),(38,4,'k','k'),(39,5,'a','a'),(40,5,'f','f'),
(41,9,'t','t'),(42,3,'c','c'),(43,8,'c','c'),(44,0,'r','r'),(45,98,'k','k'),
(46,3,'l','l'),(47,1,'o','o'),(48,0,'t','t'),(49,189,'v','v'),(50,8,'x','x'),
(51,3,'j','j'),(52,3,'x','x'),(53,9,'k','k'),(54,6,'o','o'),(55,8,'z','z'),
(56,3,'n','n'),(57,9,'c','c'),(58,5,'d','d'),(59,9,'s','s'),(60,2,'j','j'),
(61,2,'w','w'),(62,5,'f','f'),(63,8,'p','p'),(64,6,'o','o'),(65,9,'f','f'),
(66,0,'x','x'),(67,3,'q','q'),(68,6,'g','g'),(69,5,'x','x'),(70,8,'p','p'),
(71,2,'q','q'),(72,120,'q','q'),(73,25,'v','v'),(74,1,'g','g'),(75,3,'l','l'),
(76,1,'w','w'),(77,3,'h','h'),(78,153,'c','c'),(79,5,'o','o'),(80,9,'o','o'),
(81,1,'v','v'),(82,8,'y','y'),(83,7,'d','d'),(84,6,'p','p'),(85,2,'z','z'),
(86,4,'t','t'),(87,7,'b','b'),(88,3,'y','y'),(89,8,'k','k'),(90,4,'c','c'),
(91,6,'z','z'),(92,1,'t','t'),(93,7,'o','o'),(94,1,'u','u'),(95,0,'t','t'),
(96,2,'k','k'),(97,7,'u','u'),(98,2,'b','b'),(99,1,'m','m'),(100,5,'o','o');
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk ;
SUM(alias2.col_varchar_nokey)	field2
0	1
0	2
0	3
0	4
0	5
0	6
0	7
0	9

Now we look at the trace:

SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
QUERY	TRACE	MISSING_BYTES_BEYOND_MAX_MEM_SIZE	INSUFFICIENT_PRIVILEGES
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk	{

This was the first column: it repeats the query (this is a useful mark when several traces are remembered thanks to optimizer_trace_offset and optimizer_trace_limit). Now the trace. The statement's execution is naturally made of "steps":

"steps": [
  {
    "join_preparation": { 

This is a join's preparation

 "select#": 1, 

for the first SELECT of the statement (which has only one, here). Here are steps of the join's preparation:

"steps": [
  {
    "expanded_query": "/* select#1 */ select \
       sum(`test`.`alias2`.`col_varchar_nokey`) AS \ 
       `SUM(alias2.col_varchar_nokey)`,`test`.`alias2`.`pk` AS `field2` \
       from (`test`.`t1` `alias1` straight_join `test`.`t2` `alias2` \ 
       on((`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))) \
       where `test`.`alias1`.`pk` \
       group by `test`.`alias2`.`pk` \
       order by `test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`"
   }

Above is the query as it is in the join's preparation: fields have been resolved to their database and table, and each SELECT is annotated with its number (useful with subqueries).

] /* steps */
       } /* join_preparation */
     },
     {

Off to optimization:

"join_optimization": {
         "select#": 1,
         "steps": [
           {
             "condition_processing": {
               "condition": "WHERE",
               "original_condition": "(`test`.`alias1`.`pk` and \
               (`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))",
               "steps": [
                 {
                   "transformation": "equality_propagation",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"
                 },
                 {
                   "transformation": "constant_propagation",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"
                 },
                 {
                   "transformation": "trivial_condition_removal",
                   "resulting_condition": "(`test`.`alias1`.`pk` and \
                   multiple equal(`test`.`alias2`.`pk`, \
                   `test`.`alias1`.`col_int_key`))"

Not much happened in equality propagation above.

                 }
               ] /* steps */
             } /* condition_processing */
           },
           {
             "ref_optimizer_key_uses": [
               {
                 "database": "test",
                 "table": "alias2",
                 "field": "pk",
                 "equals": "`test`.`alias1`.`col_int_key`",
                 "null_rejecting": true

A possible ref access has been identified, and it is NULL-rejecting: any NULL value in `test`.`alias1`.`col_int_key` cannot have a match (it could have a match if the operator were <=>).

                }
             ] /* ref_optimizer_key_uses */
           },
           {

Now for every table in the query we estimate the cost of, and number of records returned by, a table scan, a range access,

"records_estimation": [
               {
                 "database": "test",
                 "table": "alias1",
                 "const_keys_added": {
                   "keys": [
                   ] /* keys */,
                   "cause": "group_by"
                 } /* const_keys_added */,
                 "range_analysis": {
                   "table_scan": {
                     "records": 20,
                     "cost": 8.1977
                   } /* table_scan */
                 } /* range_analysis */
               },
               {
                 "database": "test",
                 "table": "alias2",
                 "const_keys_added": {
                   "keys": [
                     "PRIMARY"
                   ] /* keys */,
                   "cause": "group_by"
                 } /* const_keys_added */,
                 "range_analysis": {
                   "table_scan": {
                     "records": 100,
                     "cost": 24.588
                   } /* table_scan */,
                   "potential_range_indices": [
                     {
                       "index": "PRIMARY",
                       "usable": true,
                       "key_parts": [
                         "pk"
                       ] /* key_parts */
                     }
                   ] /* potential_range_indices */,
                   "setup_range_conditions": [
                   ] /* setup_range_conditions */,
                   "group_index_range": {
                     "chosen": false,

Not possible to use GROUP_MIN_MAX because it can handle only one table, and we have two in the join:

 "cause": "not_single_table" 
  } /* group_index_range */ 

No range access is possible.

} /* range_analysis */
               }
             ] /* records_estimation */
           },
           {

Finding an optimal order for tables (greedy search); actually as this is STRAIGHT_JOIN only the requested order is explored, and access methods are selected:

"considered_execution_plans": [
               {
                 "database": "test",
                 "table": "alias1",
                 "best_access_path": {
                   "considered_access_paths": [
                     {
                       "access_type": "scan",
                       "records": 20,
                       "cost": 2.0977,
                       "chosen": true

Table scan be chosen!

                     }
                   ] /* considered_access_paths */
                 } /* best_access_path */,
                 "cost_for_plan": 6.0977,
                 "records_for_plan": 20,

We estimate that reading the first table, and applying any conditions to it, will yield 20 rows.

                 "rest_of_plan": [
                   {
                     "database": "test",
                     "table": "alias2",
                     "best_access_path": {
                       "considered_access_paths": [
                         {
                           "access_type": "ref",
                           "index": "PRIMARY",
                           "records": 1,
                           "cost": 20.2,
                           "chosen": true

We choose ref access on the primary key of alias2.

                         },
                         {
                           "access_type": "scan",
                           "using_join_cache": true,
                           "records": 75,
                           "cost": 7.4917,
                           "chosen": false

But not table scan, because its amount of records (75) is far greater than that of ref access (1).

                         }
                       ] /* considered_access_paths */
                     } /* best_access_path */,
                     "cost_for_plan": 30.098,
                     "records_for_plan": 20,
                     "chosen": true
                   }
                 ] /* rest_of_plan */
               }
             ] /* considered_execution_plans */
           },
           {

Now that the order of tables is fixed, we can split the WHERE condition in chunks which can be tested early ("push down of conditions down the join tree"):

"attaching_conditions_to_tables": {
               "original_condition": "((`test`.`alias2`.`pk` = \
               `test`.`alias1`.`col_int_key`) and `test`.`alias1`.`pk`)",
               "attached_conditions_computation": [
               ] /* attached_conditions_computation */,
               "attached_conditions_summary": [
                 {
                   "database": "test",
                   "table": "alias1",
                   "attached": "(`test`.`alias1`.`pk` and \
                   (`test`.`alias1`.`col_int_key` is not null))"

This condition above can be tested on rows of alias1 without even reading rows of alias2.

                 },
                 {
                   "database": "test",
                   "table": "alias2",
                   "attached": null
                 }
               ] /* attached_conditions_summary */
             } /* attaching_conditions_to_tables */
           },
           {

Now we try to simplify ORDER BY:

              "clause_processing": {
               "clause": "ORDER BY",
               "original_clause": "`test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`",
               "items": [
                 {
                   "item": "`test`.`alias1`.`col_int_key`"
                 },
                 {
                   "item": "`test`.`alias2`.`pk`",
                   "eq_ref_to_preceding_items": true

Because the WHERE clause contains `alias2`.`pk`=`alias1`.`col_int_key`, ordering by both columns is a waste: can just order by the first column, the second will always be equal to it.

                 }
               ] /* items */,
               "resulting_clause_is_simple": true,
               "resulting_clause": "`test`.`alias1`.`col_int_key`"

So we get a shorter ORDER BY clause - and this is not visible in EXPLAIN or EXPLAIN EXTENDED!! This simplification can be worth it: this shorter clause, being single-column and single-table, could be implemented by an index scan...

              } /* clause_processing */
           },
           {
             "clause_processing": {
               "clause": "GROUP BY",
               "original_clause": "`test`.`alias2`.`pk`",
               "items": [
                 {
                   "item": "`test`.`alias2`.`pk`"
                 }
               ] /* items */,
               "resulting_clause_is_simple": false,
               "resulting_clause": "`test`.`alias2`.`pk`"
             } /* clause_processing */
           },
           {
             "refine_plan": [
               {
                 "database": "test",
                 "table": "alias1",
                 "scan_type": "table"
               },
               {
                 "database": "test",
                 "table": "alias2"
               }
             ] /* refine_plan */
           }
         ] /* steps */
       } /* join_optimization */
     },
     {

Now we execute the join, and nothing interesting happens here:

       "join_execution": {
         "select#": 1,
         "steps": [
         ] /* steps */
       } /* join_execution */
     }
   ] /* steps */
 }	0	0

This was just an example. All traces have the same basic structure, but if a statement uses subqueries, there are several join preparations/optimizations/executions, subquery-specific transformations not shown here...


User Comments
Sign Up Login You must be logged in to post a comment.