Here we take an example from the test suite.
#
# Tracing of ORDER BY & GROUP BY simplification.
#
SET OPTIMIZER_TRACE="enabled=on",END_MARKERS_IN_JSON=on; # be readable
SET OPTIMIZER_TRACE_MAX_MEM_SIZE=1000000; # avoid small default
CREATE TABLE t1 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1)
);
INSERT INTO t1 VALUES
(10,7,'v','v'),(11,0,'s','s'),(12,9,'l','l'),(13,3,'y','y'),(14,4,'c','c'),
(15,2,'i','i'),(16,5,'h','h'),(17,3,'q','q'),(18,1,'a','a'),(19,3,'v','v'),
(20,6,'u','u'),(21,7,'s','s'),(22,5,'y','y'),(23,1,'z','z'),(24,204,'h','h'),
(25,224,'p','p'),(26,9,'e','e'),(27,5,'i','i'),(28,0,'y','y'),(29,3,'w','w');
CREATE TABLE t2 (
pk INT, col_int_key INT,
col_varchar_key VARCHAR(1), col_varchar_nokey VARCHAR(1),
PRIMARY KEY (pk)
);
INSERT INTO t2 VALUES
(1,4,'b','b'),(2,8,'y','y'),(3,0,'p','p'),(4,0,'f','f'),(5,0,'p','p'),
(6,7,'d','d'),(7,7,'f','f'),(8,5,'j','j'),(9,3,'e','e'),(10,188,'u','u'),
(11,4,'v','v'),(12,9,'u','u'),(13,6,'i','i'),(14,1,'x','x'),(15,5,'l','l'),
(16,6,'q','q'),(17,2,'n','n'),(18,4,'r','r'),(19,231,'c','c'),(20,4,'h','h'),
(21,3,'k','k'),(22,3,'t','t'),(23,7,'t','t'),(24,6,'k','k'),(25,7,'g','g'),
(26,9,'z','z'),(27,4,'n','n'),(28,4,'j','j'),(29,2,'l','l'),(30,1,'d','d'),
(31,2,'t','t'),(32,194,'y','y'),(33,2,'i','i'),(34,3,'j','j'),(35,8,'r','r'),
(36,4,'b','b'),(37,9,'o','o'),(38,4,'k','k'),(39,5,'a','a'),(40,5,'f','f'),
(41,9,'t','t'),(42,3,'c','c'),(43,8,'c','c'),(44,0,'r','r'),(45,98,'k','k'),
(46,3,'l','l'),(47,1,'o','o'),(48,0,'t','t'),(49,189,'v','v'),(50,8,'x','x'),
(51,3,'j','j'),(52,3,'x','x'),(53,9,'k','k'),(54,6,'o','o'),(55,8,'z','z'),
(56,3,'n','n'),(57,9,'c','c'),(58,5,'d','d'),(59,9,'s','s'),(60,2,'j','j'),
(61,2,'w','w'),(62,5,'f','f'),(63,8,'p','p'),(64,6,'o','o'),(65,9,'f','f'),
(66,0,'x','x'),(67,3,'q','q'),(68,6,'g','g'),(69,5,'x','x'),(70,8,'p','p'),
(71,2,'q','q'),(72,120,'q','q'),(73,25,'v','v'),(74,1,'g','g'),(75,3,'l','l'),
(76,1,'w','w'),(77,3,'h','h'),(78,153,'c','c'),(79,5,'o','o'),(80,9,'o','o'),
(81,1,'v','v'),(82,8,'y','y'),(83,7,'d','d'),(84,6,'p','p'),(85,2,'z','z'),
(86,4,'t','t'),(87,7,'b','b'),(88,3,'y','y'),(89,8,'k','k'),(90,4,'c','c'),
(91,6,'z','z'),(92,1,'t','t'),(93,7,'o','o'),(94,1,'u','u'),(95,0,'t','t'),
(96,2,'k','k'),(97,7,'u','u'),(98,2,'b','b'),(99,1,'m','m'),(100,5,'o','o');
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk ;
SUM(alias2.col_varchar_nokey) field2
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 9
Now we look at the trace:
SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
QUERY TRACE MISSING_BYTES_BEYOND_MAX_MEM_SIZE INSUFFICIENT_PRIVILEGES
SELECT SUM(alias2.col_varchar_nokey) , alias2.pk AS field2 FROM t1 AS alias1
STRAIGHT_JOIN t2 AS alias2 ON alias2.pk = alias1.col_int_key WHERE alias1.pk
GROUP BY field2 ORDER BY alias1.col_int_key,alias2.pk {
This was the first column: it repeats the query (this is a useful mark when several traces are remembered thanks to optimizer_trace_offset and optimizer_trace_limit). Now the trace. The statement's execution is naturally made of "steps":
"steps": [
{
"join_preparation": {
This is a join's preparation
"select#": 1,
for the first SELECT of the statement (which has only one, here). Here are steps of the join's preparation:
"steps": [
{
"expanded_query": "/* select#1 */ select \
sum(`test`.`alias2`.`col_varchar_nokey`) AS \
`SUM(alias2.col_varchar_nokey)`,`test`.`alias2`.`pk` AS `field2` \
from (`test`.`t1` `alias1` straight_join `test`.`t2` `alias2` \
on((`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))) \
where `test`.`alias1`.`pk` \
group by `test`.`alias2`.`pk` \
order by `test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`"
}
Above is the query as it is in the join's preparation: fields have been resolved to their database and table, and each SELECT is annotated with its number (useful with subqueries).
] /* steps */
} /* join_preparation */
},
{
Off to optimization:
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "(`test`.`alias1`.`pk` and \
(`test`.`alias2`.`pk` = `test`.`alias1`.`col_int_key`))",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "(`test`.`alias1`.`pk` and \
multiple equal(`test`.`alias2`.`pk`, \
`test`.`alias1`.`col_int_key`))"
},
{
"transformation": "constant_propagation",
"resulting_condition": "(`test`.`alias1`.`pk` and \
multiple equal(`test`.`alias2`.`pk`, \
`test`.`alias1`.`col_int_key`))"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`test`.`alias1`.`pk` and \
multiple equal(`test`.`alias2`.`pk`, \
`test`.`alias1`.`col_int_key`))"
Not much happened in equality propagation above.
}
] /* steps */
} /* condition_processing */
},
{
"ref_optimizer_key_uses": [
{
"database": "test",
"table": "alias2",
"field": "pk",
"equals": "`test`.`alias1`.`col_int_key`",
"null_rejecting": true
A possible ref access has been identified, and it is NULL-rejecting: any NULL value in `test`.`alias1`.`col_int_key` cannot have a match (it could have a match if the operator were <=>).
}
] /* ref_optimizer_key_uses */
},
{
Now for every table in the query we estimate the cost of, and number of records returned by, a table scan, a range access,
"records_estimation": [
{
"database": "test",
"table": "alias1",
"const_keys_added": {
"keys": [
] /* keys */,
"cause": "group_by"
} /* const_keys_added */,
"range_analysis": {
"table_scan": {
"records": 20,
"cost": 8.1977
} /* table_scan */
} /* range_analysis */
},
{
"database": "test",
"table": "alias2",
"const_keys_added": {
"keys": [
"PRIMARY"
] /* keys */,
"cause": "group_by"
} /* const_keys_added */,
"range_analysis": {
"table_scan": {
"records": 100,
"cost": 24.588
} /* table_scan */,
"potential_range_indices": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"pk"
] /* key_parts */
}
] /* potential_range_indices */,
"setup_range_conditions": [
] /* setup_range_conditions */,
"group_index_range": {
"chosen": false,
Not possible to use GROUP_MIN_MAX because it can handle only one table, and we have two in the join:
"cause": "not_single_table"
} /* group_index_range */
No range access is possible.
} /* range_analysis */
}
] /* records_estimation */
},
{
Finding an optimal order for tables (greedy search); actually as this is STRAIGHT_JOIN only the requested order is explored, and access methods are selected:
"considered_execution_plans": [
{
"database": "test",
"table": "alias1",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "scan",
"records": 20,
"cost": 2.0977,
"chosen": true
Table scan be chosen!
}
] /* considered_access_paths */
} /* best_access_path */,
"cost_for_plan": 6.0977,
"records_for_plan": 20,
We estimate that reading the first table, and applying any conditions to it, will yield 20 rows.
"rest_of_plan": [
{
"database": "test",
"table": "alias2",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "PRIMARY",
"records": 1,
"cost": 20.2,
"chosen": true
We choose ref access on the primary key of alias2.
},
{
"access_type": "scan",
"using_join_cache": true,
"records": 75,
"cost": 7.4917,
"chosen": false
But not table scan, because its amount of records (75) is far greater than that of ref access (1).
}
] /* considered_access_paths */
} /* best_access_path */,
"cost_for_plan": 30.098,
"records_for_plan": 20,
"chosen": true
}
] /* rest_of_plan */
}
] /* considered_execution_plans */
},
{
Now that the order of tables is fixed, we can split the WHERE condition in chunks which can be tested early ("push down of conditions down the join tree"):
"attaching_conditions_to_tables": {
"original_condition": "((`test`.`alias2`.`pk` = \
`test`.`alias1`.`col_int_key`) and `test`.`alias1`.`pk`)",
"attached_conditions_computation": [
] /* attached_conditions_computation */,
"attached_conditions_summary": [
{
"database": "test",
"table": "alias1",
"attached": "(`test`.`alias1`.`pk` and \
(`test`.`alias1`.`col_int_key` is not null))"
This condition above can be tested on rows of alias1 without even reading rows of alias2.
},
{
"database": "test",
"table": "alias2",
"attached": null
}
] /* attached_conditions_summary */
} /* attaching_conditions_to_tables */
},
{
Now we try to simplify ORDER BY:
"clause_processing": {
"clause": "ORDER BY",
"original_clause": "`test`.`alias1`.`col_int_key`,`test`.`alias2`.`pk`",
"items": [
{
"item": "`test`.`alias1`.`col_int_key`"
},
{
"item": "`test`.`alias2`.`pk`",
"eq_ref_to_preceding_items": true
Because the WHERE clause contains `alias2`.`pk`=`alias1`.`col_int_key`, ordering by both columns is a waste: can just order by the first column, the second will always be equal to it.
}
] /* items */,
"resulting_clause_is_simple": true,
"resulting_clause": "`test`.`alias1`.`col_int_key`"
So we get a shorter ORDER BY clause - and this is not visible in EXPLAIN or EXPLAIN EXTENDED!! This simplification can be worth it: this shorter clause, being single-column and single-table, could be implemented by an index scan...
} /* clause_processing */
},
{
"clause_processing": {
"clause": "GROUP BY",
"original_clause": "`test`.`alias2`.`pk`",
"items": [
{
"item": "`test`.`alias2`.`pk`"
}
] /* items */,
"resulting_clause_is_simple": false,
"resulting_clause": "`test`.`alias2`.`pk`"
} /* clause_processing */
},
{
"refine_plan": [
{
"database": "test",
"table": "alias1",
"scan_type": "table"
},
{
"database": "test",
"table": "alias2"
}
] /* refine_plan */
}
] /* steps */
} /* join_optimization */
},
{
Now we execute the join, and nothing interesting happens here:
"join_execution": {
"select#": 1,
"steps": [
] /* steps */
} /* join_execution */
}
] /* steps */
} 0 0
This was just an example. All traces have the same basic structure, but if a statement uses subqueries, there are several join preparations/optimizations/executions, subquery-specific transformations not shown here...