00001 /* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 /* 00018 TODO: 00019 Fix that MAYBE_KEY are stored in the tree so that we can detect use 00020 of full hash keys for queries like: 00021 00022 select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205); 00023 00024 */ 00025 00026 /* 00027 This file contains: 00028 00029 RangeAnalysisModule 00030 A module that accepts a condition, index (or partitioning) description, 00031 and builds lists of intervals (in index/partitioning space), such that 00032 all possible records that match the condition are contained within the 00033 intervals. 00034 The entry point for the range analysis module is get_mm_tree() function. 00035 00036 The lists are returned in form of complicated structure of interlinked 00037 SEL_TREE/SEL_IMERGE/SEL_ARG objects. 00038 See check_quick_keys, find_used_partitions for examples of how to walk 00039 this structure. 00040 All direct "users" of this module are located within this file, too. 00041 00042 00043 PartitionPruningModule 00044 A module that accepts a partitioned table, condition, and finds which 00045 partitions we will need to use in query execution. Search down for 00046 "PartitionPruningModule" for description. 00047 The module has single entry point - prune_partitions() function. 00048 00049 00050 Range/index_merge/groupby-minmax optimizer module 00051 A module that accepts a table, condition, and returns 00052 - a QUICK_*_SELECT object that can be used to retrieve rows that match 00053 the specified condition, or a "no records will match the condition" 00054 statement. 00055 00056 The module entry points are 00057 test_quick_select() 00058 get_quick_select_for_ref() 00059 00060 00061 Record retrieval code for range/index_merge/groupby-min-max. 00062 Implementations of QUICK_*_SELECT classes. 00063 */ 00064 00065 #ifdef USE_PRAGMA_IMPLEMENTATION 00066 #pragma implementation // gcc: Class implementation 00067 #endif 00068 00069 #include "mysql_priv.h" 00070 #include <m_ctype.h> 00071 #include "sql_select.h" 00072 00073 #ifndef EXTRA_DEBUG 00074 #define test_rb_tree(A,B) {} 00075 #define test_use_count(A) {} 00076 #endif 00077 00078 /* 00079 Convert double value to #rows. Currently this does floor(), and we 00080 might consider using round() instead. 00081 */ 00082 #define double2rows(x) ((ha_rows)(x)) 00083 00084 static int sel_cmp(Field *f,char *a,char *b,uint8 a_flag,uint8 b_flag); 00085 00086 static char is_null_string[2]= {1,0}; 00087 00088 00089 /* 00090 A construction block of the SEL_ARG-graph. 00091 00092 The following description only covers graphs of SEL_ARG objects with 00093 sel_arg->type==KEY_RANGE: 00094 00095 One SEL_ARG object represents an "elementary interval" in form 00096 00097 min_value <=? table.keypartX <=? max_value 00098 00099 The interval is a non-empty interval of any kind: with[out] minimum/maximum 00100 bound, [half]open/closed, single-point interval, etc. 00101 00102 1. SEL_ARG GRAPH STRUCTURE 00103 00104 SEL_ARG objects are linked together in a graph. The meaning of the graph 00105 is better demostrated by an example: 00106 00107 tree->keys[i] 00108 | 00109 | $ $ 00110 | part=1 $ part=2 $ part=3 00111 | $ $ 00112 | +-------+ $ +-------+ $ +--------+ 00113 | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 | 00114 | +-------+ $ +-------+ $ +--------+ 00115 | | $ $ | 00116 | | $ $ +--------+ 00117 | | $ $ | kp3=12 | 00118 | | $ $ +--------+ 00119 | +-------+ $ $ 00120 \->| kp1=2 |--$--------------$-+ 00121 +-------+ $ $ | +--------+ 00122 | $ $ ==>| kp3=11 | 00123 +-------+ $ $ | +--------+ 00124 | kp1=3 |--$--------------$-+ | 00125 +-------+ $ $ +--------+ 00126 | $ $ | kp3=14 | 00127 ... $ $ +--------+ 00128 00129 The entire graph is partitioned into "interval lists". 00130 00131 An interval list is a sequence of ordered disjoint intervals over the same 00132 key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally, 00133 all intervals in the list form an RB-tree, linked via left/right/parent 00134 pointers. The RB-tree root SEL_ARG object will be further called "root of the 00135 interval list". 00136 00137 In the example pic, there are 4 interval lists: 00138 "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13". 00139 The vertical lines represent SEL_ARG::next/prev pointers. 00140 00141 In an interval list, each member X may have SEL_ARG::next_key_part pointer 00142 pointing to the root of another interval list Y. The pointed interval list 00143 must cover a key part with greater number (i.e. Y->part > X->part). 00144 00145 In the example pic, the next_key_part pointers are represented by 00146 horisontal lines. 00147 00148 2. SEL_ARG GRAPH SEMANTICS 00149 00150 It represents a condition in a special form (we don't have a name for it ATM) 00151 The SEL_ARG::next/prev is "OR", and next_key_part is "AND". 00152 00153 For example, the picture represents the condition in form: 00154 (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR 00155 (kp1=2 AND (kp3=11 OR kp3=14)) OR 00156 (kp1=3 AND (kp3=11 OR kp3=14)) 00157 00158 00159 3. SEL_ARG GRAPH USE 00160 00161 Use get_mm_tree() to construct SEL_ARG graph from WHERE condition. 00162 Then walk the SEL_ARG graph and get a list of dijsoint ordered key 00163 intervals (i.e. intervals in form 00164 00165 (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K) 00166 00167 Those intervals can be used to access the index. The uses are in: 00168 - check_quick_select() - Walk the SEL_ARG graph and find an estimate of 00169 how many table records are contained within all 00170 intervals. 00171 - get_quick_select() - Walk the SEL_ARG, materialize the key intervals, 00172 and create QUICK_RANGE_SELECT object that will 00173 read records within these intervals. 00174 */ 00175 00176 class SEL_ARG :public Sql_alloc 00177 { 00178 public: 00179 uint8 min_flag,max_flag,maybe_flag; 00180 uint8 part; // Which key part 00181 uint8 maybe_null; 00182 /* 00183 Number of children of this element in the RB-tree, plus 1 for this 00184 element itself. 00185 */ 00186 uint16 elements; 00187 /* 00188 Valid only for elements which are RB-tree roots: Number of times this 00189 RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by 00190 SEL_TREE::keys[i] or by a temporary SEL_ARG* variable) 00191 */ 00192 ulong use_count; 00193 00194 Field *field; 00195 char *min_value,*max_value; // Pointer to range 00196 00197 SEL_ARG *left,*right; /* R-B tree children */ 00198 SEL_ARG *next,*prev; /* Links for bi-directional interval list */ 00199 SEL_ARG *parent; /* R-B tree parent */ 00200 SEL_ARG *next_key_part; 00201 enum leaf_color { BLACK,RED } color; 00202 enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type; 00203 00204 SEL_ARG() {} 00205 SEL_ARG(SEL_ARG &); 00206 SEL_ARG(Field *,const char *,const char *); 00207 SEL_ARG(Field *field, uint8 part, char *min_value, char *max_value, 00208 uint8 min_flag, uint8 max_flag, uint8 maybe_flag); 00209 SEL_ARG(enum Type type_arg) 00210 :min_flag(0),elements(1),use_count(1),left(0),next_key_part(0), 00211 color(BLACK), type(type_arg) 00212 {} 00213 inline bool is_same(SEL_ARG *arg) 00214 { 00215 if (type != arg->type || part != arg->part) 00216 return 0; 00217 if (type != KEY_RANGE) 00218 return 1; 00219 return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0; 00220 } 00221 inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; } 00222 inline void maybe_smaller() { maybe_flag=1; } 00223 inline int cmp_min_to_min(SEL_ARG* arg) 00224 { 00225 return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag); 00226 } 00227 inline int cmp_min_to_max(SEL_ARG* arg) 00228 { 00229 return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag); 00230 } 00231 inline int cmp_max_to_max(SEL_ARG* arg) 00232 { 00233 return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag); 00234 } 00235 inline int cmp_max_to_min(SEL_ARG* arg) 00236 { 00237 return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag); 00238 } 00239 SEL_ARG *clone_and(SEL_ARG* arg) 00240 { // Get overlapping range 00241 char *new_min,*new_max; 00242 uint8 flag_min,flag_max; 00243 if (cmp_min_to_min(arg) >= 0) 00244 { 00245 new_min=min_value; flag_min=min_flag; 00246 } 00247 else 00248 { 00249 new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */ 00250 } 00251 if (cmp_max_to_max(arg) <= 0) 00252 { 00253 new_max=max_value; flag_max=max_flag; 00254 } 00255 else 00256 { 00257 new_max=arg->max_value; flag_max=arg->max_flag; 00258 } 00259 return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max, 00260 test(maybe_flag && arg->maybe_flag)); 00261 } 00262 SEL_ARG *clone_first(SEL_ARG *arg) 00263 { // min <= X < arg->min 00264 return new SEL_ARG(field,part, min_value, arg->min_value, 00265 min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX, 00266 maybe_flag | arg->maybe_flag); 00267 } 00268 SEL_ARG *clone_last(SEL_ARG *arg) 00269 { // min <= X <= key_max 00270 return new SEL_ARG(field, part, min_value, arg->max_value, 00271 min_flag, arg->max_flag, maybe_flag | arg->maybe_flag); 00272 } 00273 SEL_ARG *clone(SEL_ARG *new_parent,SEL_ARG **next); 00274 00275 bool copy_min(SEL_ARG* arg) 00276 { // Get overlapping range 00277 if (cmp_min_to_min(arg) > 0) 00278 { 00279 min_value=arg->min_value; min_flag=arg->min_flag; 00280 if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) == 00281 (NO_MAX_RANGE | NO_MIN_RANGE)) 00282 return 1; // Full range 00283 } 00284 maybe_flag|=arg->maybe_flag; 00285 return 0; 00286 } 00287 bool copy_max(SEL_ARG* arg) 00288 { // Get overlapping range 00289 if (cmp_max_to_max(arg) <= 0) 00290 { 00291 max_value=arg->max_value; max_flag=arg->max_flag; 00292 if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) == 00293 (NO_MAX_RANGE | NO_MIN_RANGE)) 00294 return 1; // Full range 00295 } 00296 maybe_flag|=arg->maybe_flag; 00297 return 0; 00298 } 00299 00300 void copy_min_to_min(SEL_ARG *arg) 00301 { 00302 min_value=arg->min_value; min_flag=arg->min_flag; 00303 } 00304 void copy_min_to_max(SEL_ARG *arg) 00305 { 00306 max_value=arg->min_value; 00307 max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX; 00308 } 00309 void copy_max_to_min(SEL_ARG *arg) 00310 { 00311 min_value=arg->max_value; 00312 min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN; 00313 } 00314 void store_min(uint length,char **min_key,uint min_key_flag) 00315 { 00316 if ((min_flag & GEOM_FLAG) || 00317 (!(min_flag & NO_MIN_RANGE) && 00318 !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN)))) 00319 { 00320 if (maybe_null && *min_value) 00321 { 00322 **min_key=1; 00323 bzero(*min_key+1,length-1); 00324 } 00325 else 00326 memcpy(*min_key,min_value,length); 00327 (*min_key)+= length; 00328 } 00329 } 00330 void store(uint length,char **min_key,uint min_key_flag, 00331 char **max_key, uint max_key_flag) 00332 { 00333 store_min(length, min_key, min_key_flag); 00334 if (!(max_flag & NO_MAX_RANGE) && 00335 !(max_key_flag & (NO_MAX_RANGE | NEAR_MAX))) 00336 { 00337 if (maybe_null && *max_value) 00338 { 00339 **max_key=1; 00340 bzero(*max_key+1,length-1); 00341 } 00342 else 00343 memcpy(*max_key,max_value,length); 00344 (*max_key)+= length; 00345 } 00346 } 00347 00348 void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag) 00349 { 00350 SEL_ARG *key_tree= first(); 00351 key_tree->store(key[key_tree->part].store_length, 00352 range_key,*range_key_flag,range_key,NO_MAX_RANGE); 00353 *range_key_flag|= key_tree->min_flag; 00354 if (key_tree->next_key_part && 00355 key_tree->next_key_part->part == key_tree->part+1 && 00356 !(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) && 00357 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) 00358 key_tree->next_key_part->store_min_key(key,range_key, range_key_flag); 00359 } 00360 00361 void store_max_key(KEY_PART *key,char **range_key, uint *range_key_flag) 00362 { 00363 SEL_ARG *key_tree= last(); 00364 key_tree->store(key[key_tree->part].store_length, 00365 range_key, NO_MIN_RANGE, range_key,*range_key_flag); 00366 (*range_key_flag)|= key_tree->max_flag; 00367 if (key_tree->next_key_part && 00368 key_tree->next_key_part->part == key_tree->part+1 && 00369 !(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)) && 00370 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) 00371 key_tree->next_key_part->store_max_key(key,range_key, range_key_flag); 00372 } 00373 00374 SEL_ARG *insert(SEL_ARG *key); 00375 SEL_ARG *tree_delete(SEL_ARG *key); 00376 SEL_ARG *find_range(SEL_ARG *key); 00377 SEL_ARG *rb_insert(SEL_ARG *leaf); 00378 friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par); 00379 #ifdef EXTRA_DEBUG 00380 friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent); 00381 void test_use_count(SEL_ARG *root); 00382 #endif 00383 SEL_ARG *first(); 00384 SEL_ARG *last(); 00385 void make_root(); 00386 inline bool simple_key() 00387 { 00388 return !next_key_part && elements == 1; 00389 } 00390 void increment_use_count(long count) 00391 { 00392 if (next_key_part) 00393 { 00394 next_key_part->use_count+=count; 00395 count*= (next_key_part->use_count-count); 00396 for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next) 00397 if (pos->next_key_part) 00398 pos->increment_use_count(count); 00399 } 00400 } 00401 void free_tree() 00402 { 00403 for (SEL_ARG *pos=first(); pos ; pos=pos->next) 00404 if (pos->next_key_part) 00405 { 00406 pos->next_key_part->use_count--; 00407 pos->next_key_part->free_tree(); 00408 } 00409 } 00410 00411 inline SEL_ARG **parent_ptr() 00412 { 00413 return parent->left == this ? &parent->left : &parent->right; 00414 } 00415 SEL_ARG *clone_tree(); 00416 00417 00418 /* 00419 Check if this SEL_ARG object represents a single-point interval 00420 00421 SYNOPSIS 00422 is_singlepoint() 00423 00424 DESCRIPTION 00425 Check if this SEL_ARG object (not tree) represents a single-point 00426 interval, i.e. if it represents a "keypart = const" or 00427 "keypart IS NULL". 00428 00429 RETURN 00430 TRUE This SEL_ARG object represents a singlepoint interval 00431 FALSE Otherwise 00432 */ 00433 00434 bool is_singlepoint() 00435 { 00436 /* 00437 Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) 00438 flags, and the same for right edge. 00439 */ 00440 if (min_flag || max_flag) 00441 return FALSE; 00442 byte *min_val= (byte *)min_value; 00443 byte *max_val= (byte *)max_value; 00444 00445 if (maybe_null) 00446 { 00447 /* First byte is a NULL value indicator */ 00448 if (*min_val != *max_val) 00449 return FALSE; 00450 00451 if (*min_val) 00452 return TRUE; /* This "x IS NULL" */ 00453 min_val++; 00454 max_val++; 00455 } 00456 return !field->key_cmp(min_val, max_val); 00457 } 00458 }; 00459 00460 class SEL_IMERGE; 00461 00462 00463 class SEL_TREE :public Sql_alloc 00464 { 00465 public: 00466 /* 00467 Starting an effort to document this field: 00468 (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => 00469 (type == SEL_TREE::IMPOSSIBLE) 00470 */ 00471 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; 00472 SEL_TREE(enum Type type_arg) :type(type_arg) {} 00473 SEL_TREE() :type(KEY) 00474 { 00475 keys_map.clear_all(); 00476 bzero((char*) keys,sizeof(keys)); 00477 } 00478 /* 00479 Note: there may exist SEL_TREE objects with sel_tree->type=KEY and 00480 keys[i]=0 for all i. (SergeyP: it is not clear whether there is any 00481 merit in range analyzer functions (e.g. get_mm_parts) returning a 00482 pointer to such SEL_TREE instead of NULL) 00483 */ 00484 SEL_ARG *keys[MAX_KEY]; 00485 key_map keys_map; /* bitmask of non-NULL elements in keys */ 00486 00487 /* 00488 Possible ways to read rows using index_merge. The list is non-empty only 00489 if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). 00490 */ 00491 List<SEL_IMERGE> merges; 00492 00493 /* The members below are filled/used only after get_mm_tree is done */ 00494 key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ 00495 uint n_ror_scans; /* number of set bits in ror_scans_map */ 00496 00497 struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ 00498 struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ 00499 /* Note that #records for each key scan is stored in table->quick_rows */ 00500 }; 00501 00502 class RANGE_OPT_PARAM 00503 { 00504 public: 00505 THD *thd; /* Current thread handle */ 00506 TABLE *table; /* Table being analyzed */ 00507 COND *cond; /* Used inside get_mm_tree(). */ 00508 table_map prev_tables; 00509 table_map read_tables; 00510 table_map current_table; /* Bit of the table being analyzed */ 00511 00512 /* Array of parts of all keys for which range analysis is performed */ 00513 KEY_PART *key_parts; 00514 KEY_PART *key_parts_end; 00515 MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */ 00516 MEM_ROOT *old_root; /* Memory that will last until the query end */ 00517 /* 00518 Number of indexes used in range analysis (In SEL_TREE::keys only first 00519 #keys elements are not empty) 00520 */ 00521 uint keys; 00522 00523 /* 00524 If true, the index descriptions describe real indexes (and it is ok to 00525 call field->optimize_range(real_keynr[...], ...). 00526 Otherwise index description describes fake indexes. 00527 */ 00528 bool using_real_indexes; 00529 00530 bool remove_jump_scans; 00531 00532 /* 00533 used_key_no -> table_key_no translation table. Only makes sense if 00534 using_real_indexes==TRUE 00535 */ 00536 uint real_keynr[MAX_KEY]; 00537 }; 00538 00539 class PARAM : public RANGE_OPT_PARAM 00540 { 00541 public: 00542 KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ 00543 uint baseflag, max_key_part, range_count; 00544 00545 00546 char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], 00547 max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; 00548 bool quick; // Don't calulate possible keys 00549 00550 uint fields_bitmap_size; 00551 MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ 00552 00553 key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ 00554 00555 uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ 00556 uint imerge_cost_buff_size; /* size of the buffer */ 00557 00558 /* TRUE if last checked tree->key can be used for ROR-scan */ 00559 bool is_ror_scan; 00560 /* Number of ranges in the last checked tree->key */ 00561 uint n_ranges; 00562 }; 00563 00564 class TABLE_READ_PLAN; 00565 class TRP_RANGE; 00566 class TRP_ROR_INTERSECT; 00567 class TRP_ROR_UNION; 00568 class TRP_ROR_INDEX_MERGE; 00569 class TRP_GROUP_MIN_MAX; 00570 00571 struct st_ror_scan_info; 00572 00573 static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, 00574 Item_func::Functype type,Item *value, 00575 Item_result cmp_type); 00576 static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, 00577 KEY_PART *key_part, 00578 Item_func::Functype type,Item *value); 00579 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond); 00580 00581 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); 00582 static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree, 00583 bool update_tbl_stats); 00584 static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, 00585 char *min_key,uint min_key_flag, 00586 char *max_key, uint max_key_flag); 00587 00588 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, 00589 SEL_ARG *key_tree, 00590 MEM_ROOT *alloc = NULL); 00591 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, 00592 bool index_read_must_be_used, 00593 bool update_tbl_stats, 00594 double read_time); 00595 static 00596 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, 00597 double read_time, 00598 bool *are_all_covering); 00599 static 00600 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 00601 SEL_TREE *tree, 00602 double read_time); 00603 static 00604 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, 00605 double read_time); 00606 static 00607 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree); 00608 static int get_index_merge_params(PARAM *param, key_map& needed_reg, 00609 SEL_IMERGE *imerge, double *read_time, 00610 ha_rows* imerge_rows); 00611 static double get_index_only_read_time(const PARAM* param, ha_rows records, 00612 int keynr); 00613 00614 #ifndef DBUG_OFF 00615 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, 00616 const char *msg); 00617 static void print_ror_scans_arr(TABLE *table, const char *msg, 00618 struct st_ror_scan_info **start, 00619 struct st_ror_scan_info **end); 00620 static void print_rowid(byte* val, int len); 00621 static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); 00622 #endif 00623 00624 static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); 00625 static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); 00626 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); 00627 static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2); 00628 static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag); 00629 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1); 00630 bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, 00631 SEL_ARG *key_tree,char *min_key,uint min_key_flag, 00632 char *max_key,uint max_key_flag); 00633 static bool eq_tree(SEL_ARG* a,SEL_ARG *b); 00634 00635 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); 00636 static bool null_part_in_key(KEY_PART *key_part, const char *key, 00637 uint length); 00638 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); 00639 00640 00641 /* 00642 SEL_IMERGE is a list of possible ways to do index merge, i.e. it is 00643 a condition in the following form: 00644 (t_1||t_2||...||t_N) && (next) 00645 00646 where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair 00647 (t_i,t_j) contains SEL_ARGS for the same index. 00648 00649 SEL_TREE contained in SEL_IMERGE always has merges=NULL. 00650 00651 This class relies on memory manager to do the cleanup. 00652 */ 00653 00654 class SEL_IMERGE : public Sql_alloc 00655 { 00656 enum { PREALLOCED_TREES= 10}; 00657 public: 00658 SEL_TREE *trees_prealloced[PREALLOCED_TREES]; 00659 SEL_TREE **trees; /* trees used to do index_merge */ 00660 SEL_TREE **trees_next; /* last of these trees */ 00661 SEL_TREE **trees_end; /* end of allocated space */ 00662 00663 SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */ 00664 00665 SEL_IMERGE() : 00666 trees(&trees_prealloced[0]), 00667 trees_next(trees), 00668 trees_end(trees + PREALLOCED_TREES) 00669 {} 00670 int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); 00671 int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree); 00672 int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge); 00673 }; 00674 00675 00676 /* 00677 Add SEL_TREE to this index_merge without any checks, 00678 00679 NOTES 00680 This function implements the following: 00681 (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs 00682 00683 RETURN 00684 0 - OK 00685 -1 - Out of memory. 00686 */ 00687 00688 int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) 00689 { 00690 if (trees_next == trees_end) 00691 { 00692 const int realloc_ratio= 2; /* Double size for next round */ 00693 uint old_elements= (trees_end - trees); 00694 uint old_size= sizeof(SEL_TREE**) * old_elements; 00695 uint new_size= old_size * realloc_ratio; 00696 SEL_TREE **new_trees; 00697 if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size))) 00698 return -1; 00699 memcpy(new_trees, trees, old_size); 00700 trees= new_trees; 00701 trees_next= trees + old_elements; 00702 trees_end= trees + old_elements * realloc_ratio; 00703 } 00704 *(trees_next++)= tree; 00705 return 0; 00706 } 00707 00708 00709 /* 00710 Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree, 00711 combining new_tree with one of the trees in this SEL_IMERGE if they both 00712 have SEL_ARGs for the same key. 00713 00714 SYNOPSIS 00715 or_sel_tree_with_checks() 00716 param PARAM from SQL_SELECT::test_quick_select 00717 new_tree SEL_TREE with type KEY or KEY_SMALLER. 00718 00719 NOTES 00720 This does the following: 00721 (t_1||...||t_k)||new_tree = 00722 either 00723 = (t_1||...||t_k||new_tree) 00724 or 00725 = (t_1||....||(t_j|| new_tree)||...||t_k), 00726 00727 where t_i, y are SEL_TREEs. 00728 new_tree is combined with the first t_j it has a SEL_ARG on common 00729 key with. As a consequence of this, choice of keys to do index_merge 00730 read may depend on the order of conditions in WHERE part of the query. 00731 00732 RETURN 00733 0 OK 00734 1 One of the trees was combined with new_tree to SEL_TREE::ALWAYS, 00735 and (*this) should be discarded. 00736 -1 An error occurred. 00737 */ 00738 00739 int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree) 00740 { 00741 for (SEL_TREE** tree = trees; 00742 tree != trees_next; 00743 tree++) 00744 { 00745 if (sel_trees_can_be_ored(*tree, new_tree, param)) 00746 { 00747 *tree = tree_or(param, *tree, new_tree); 00748 if (!*tree) 00749 return 1; 00750 if (((*tree)->type == SEL_TREE::MAYBE) || 00751 ((*tree)->type == SEL_TREE::ALWAYS)) 00752 return 1; 00753 /* SEL_TREE::IMPOSSIBLE is impossible here */ 00754 return 0; 00755 } 00756 } 00757 00758 /* New tree cannot be combined with any of existing trees. */ 00759 return or_sel_tree(param, new_tree); 00760 } 00761 00762 00763 /* 00764 Perform OR operation on this index_merge and supplied index_merge list. 00765 00766 RETURN 00767 0 - OK 00768 1 - One of conditions in result is always TRUE and this SEL_IMERGE 00769 should be discarded. 00770 -1 - An error occurred 00771 */ 00772 00773 int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge) 00774 { 00775 for (SEL_TREE** tree= imerge->trees; 00776 tree != imerge->trees_next; 00777 tree++) 00778 { 00779 if (or_sel_tree_with_checks(param, *tree)) 00780 return 1; 00781 } 00782 return 0; 00783 } 00784 00785 00786 /* 00787 Perform AND operation on two index_merge lists and store result in *im1. 00788 */ 00789 00790 inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) 00791 { 00792 im1->concat(im2); 00793 } 00794 00795 00796 /* 00797 Perform OR operation on 2 index_merge lists, storing result in first list. 00798 00799 NOTES 00800 The following conversion is implemented: 00801 (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) => 00802 => (a_1||b_1). 00803 00804 i.e. all conjuncts except the first one are currently dropped. 00805 This is done to avoid producing N*K ways to do index_merge. 00806 00807 If (a_1||b_1) produce a condition that is always TRUE, NULL is returned 00808 and index_merge is discarded (while it is actually possible to try 00809 harder). 00810 00811 As a consequence of this, choice of keys to do index_merge read may depend 00812 on the order of conditions in WHERE part of the query. 00813 00814 RETURN 00815 0 OK, result is stored in *im1 00816 other Error, both passed lists are unusable 00817 */ 00818 00819 int imerge_list_or_list(RANGE_OPT_PARAM *param, 00820 List<SEL_IMERGE> *im1, 00821 List<SEL_IMERGE> *im2) 00822 { 00823 SEL_IMERGE *imerge= im1->head(); 00824 im1->empty(); 00825 im1->push_back(imerge); 00826 00827 return imerge->or_sel_imerge_with_checks(param, im2->head()); 00828 } 00829 00830 00831 /* 00832 Perform OR operation on index_merge list and key tree. 00833 00834 RETURN 00835 0 OK, result is stored in *im1. 00836 other Error 00837 */ 00838 00839 int imerge_list_or_tree(RANGE_OPT_PARAM *param, 00840 List<SEL_IMERGE> *im1, 00841 SEL_TREE *tree) 00842 { 00843 SEL_IMERGE *imerge; 00844 List_iterator<SEL_IMERGE> it(*im1); 00845 while ((imerge= it++)) 00846 { 00847 if (imerge->or_sel_tree_with_checks(param, tree)) 00848 it.remove(); 00849 } 00850 return im1->is_empty(); 00851 } 00852 00853 /*************************************************************************** 00854 ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT 00855 ***************************************************************************/ 00856 00857 /* make a select from mysql info 00858 Error is set as following: 00859 0 = ok 00860 1 = Got some error (out of memory?) 00861 */ 00862 00863 SQL_SELECT *make_select(TABLE *head, table_map const_tables, 00864 table_map read_tables, COND *conds, 00865 bool allow_null_cond, 00866 int *error) 00867 { 00868 SQL_SELECT *select; 00869 DBUG_ENTER("make_select"); 00870 00871 *error=0; 00872 00873 if (!conds && !allow_null_cond) 00874 DBUG_RETURN(0); 00875 if (!(select= new SQL_SELECT)) 00876 { 00877 *error= 1; // out of memory 00878 DBUG_RETURN(0); /* purecov: inspected */ 00879 } 00880 select->read_tables=read_tables; 00881 select->const_tables=const_tables; 00882 select->head=head; 00883 select->cond=conds; 00884 00885 if (head->sort.io_cache) 00886 { 00887 select->file= *head->sort.io_cache; 00888 select->records=(ha_rows) (select->file.end_of_file/ 00889 head->file->ref_length); 00890 my_free((gptr) (head->sort.io_cache),MYF(0)); 00891 head->sort.io_cache=0; 00892 } 00893 DBUG_RETURN(select); 00894 } 00895 00896 00897 SQL_SELECT::SQL_SELECT() :quick(0),cond(0),free_cond(0) 00898 { 00899 quick_keys.clear_all(); needed_reg.clear_all(); 00900 my_b_clear(&file); 00901 } 00902 00903 00904 void SQL_SELECT::cleanup() 00905 { 00906 delete quick; 00907 quick= 0; 00908 if (free_cond) 00909 { 00910 free_cond=0; 00911 delete cond; 00912 cond= 0; 00913 } 00914 close_cached_file(&file); 00915 } 00916 00917 00918 SQL_SELECT::~SQL_SELECT() 00919 { 00920 cleanup(); 00921 } 00922 00923 #undef index // Fix for Unixware 7 00924 00925 QUICK_SELECT_I::QUICK_SELECT_I() 00926 :max_used_key_length(0), 00927 used_key_parts(0) 00928 {} 00929 00930 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, 00931 bool no_alloc, MEM_ROOT *parent_alloc) 00932 :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),range(0) 00933 { 00934 my_bitmap_map *bitmap; 00935 DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT"); 00936 00937 in_ror_merged_scan= 0; 00938 sorted= 0; 00939 index= key_nr; 00940 head= table; 00941 key_part_info= head->key_info[index].key_part; 00942 my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16); 00943 00944 /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ 00945 multi_range_bufsiz= thd->variables.read_rnd_buff_size; 00946 multi_range_count= thd->variables.multi_range_count; 00947 multi_range_length= 0; 00948 multi_range= NULL; 00949 multi_range_buff= NULL; 00950 00951 if (!no_alloc && !parent_alloc) 00952 { 00953 // Allocates everything through the internal memroot 00954 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 00955 thd->mem_root= &alloc; 00956 } 00957 else 00958 bzero((char*) &alloc,sizeof(alloc)); 00959 file= head->file; 00960 record= head->record[0]; 00961 save_read_set= head->read_set; 00962 save_write_set= head->write_set; 00963 00964 /* Allocate a bitmap for used columns */ 00965 if (!(bitmap= (my_bitmap_map*) my_malloc(head->s->column_bitmap_size, 00966 MYF(MY_WME)))) 00967 { 00968 column_bitmap.bitmap= 0; 00969 error= 1; 00970 } 00971 else 00972 bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE); 00973 DBUG_VOID_RETURN; 00974 } 00975 00976 00977 int QUICK_RANGE_SELECT::init() 00978 { 00979 DBUG_ENTER("QUICK_RANGE_SELECT::init"); 00980 00981 if (file->inited != handler::NONE) 00982 file->ha_index_or_rnd_end(); 00983 DBUG_RETURN(error= file->ha_index_init(index, 1)); 00984 } 00985 00986 00987 void QUICK_RANGE_SELECT::range_end() 00988 { 00989 if (file->inited != handler::NONE) 00990 file->ha_index_or_rnd_end(); 00991 } 00992 00993 00994 QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() 00995 { 00996 DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT"); 00997 if (!dont_free) 00998 { 00999 /* file is NULL for CPK scan on covering ROR-intersection */ 01000 if (file) 01001 { 01002 range_end(); 01003 if (free_file) 01004 { 01005 DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, 01006 free_file)); 01007 file->ha_external_lock(current_thd, F_UNLCK); 01008 file->close(); 01009 delete file; 01010 } 01011 else 01012 { 01013 file->extra(HA_EXTRA_NO_KEYREAD); 01014 } 01015 } 01016 delete_dynamic(&ranges); /* ranges are allocated in alloc */ 01017 free_root(&alloc,MYF(0)); 01018 my_free((char*) column_bitmap.bitmap, MYF(MY_ALLOW_ZERO_PTR)); 01019 } 01020 head->column_bitmaps_set(save_read_set, save_write_set); 01021 x_free(multi_range); 01022 x_free(multi_range_buff); 01023 DBUG_VOID_RETURN; 01024 } 01025 01026 01027 QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, 01028 TABLE *table) 01029 :pk_quick_select(NULL), thd(thd_param) 01030 { 01031 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT"); 01032 index= MAX_KEY; 01033 head= table; 01034 bzero(&read_record, sizeof(read_record)); 01035 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 01036 DBUG_VOID_RETURN; 01037 } 01038 01039 int QUICK_INDEX_MERGE_SELECT::init() 01040 { 01041 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init"); 01042 DBUG_RETURN(0); 01043 } 01044 01045 int QUICK_INDEX_MERGE_SELECT::reset() 01046 { 01047 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset"); 01048 DBUG_RETURN(read_keys_and_merge()); 01049 } 01050 01051 bool 01052 QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) 01053 { 01054 /* 01055 Save quick_select that does scan on clustered primary key as it will be 01056 processed separately. 01057 */ 01058 if (head->file->primary_key_is_clustered() && 01059 quick_sel_range->index == head->s->primary_key) 01060 pk_quick_select= quick_sel_range; 01061 else 01062 return quick_selects.push_back(quick_sel_range); 01063 return 0; 01064 } 01065 01066 QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() 01067 { 01068 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); 01069 QUICK_RANGE_SELECT* quick; 01070 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT"); 01071 quick_it.rewind(); 01072 while ((quick= quick_it++)) 01073 quick->file= NULL; 01074 quick_selects.delete_elements(); 01075 delete pk_quick_select; 01076 free_root(&alloc,MYF(0)); 01077 DBUG_VOID_RETURN; 01078 } 01079 01080 01081 QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, 01082 TABLE *table, 01083 bool retrieve_full_rows, 01084 MEM_ROOT *parent_alloc) 01085 : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows), 01086 scans_inited(FALSE) 01087 { 01088 index= MAX_KEY; 01089 head= table; 01090 record= head->record[0]; 01091 if (!parent_alloc) 01092 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 01093 else 01094 bzero(&alloc, sizeof(MEM_ROOT)); 01095 last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, 01096 head->file->ref_length); 01097 } 01098 01099 01100 /* 01101 Do post-constructor initialization. 01102 SYNOPSIS 01103 QUICK_ROR_INTERSECT_SELECT::init() 01104 01105 RETURN 01106 0 OK 01107 other Error code 01108 */ 01109 01110 int QUICK_ROR_INTERSECT_SELECT::init() 01111 { 01112 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init"); 01113 /* Check if last_rowid was successfully allocated in ctor */ 01114 DBUG_RETURN(!last_rowid); 01115 } 01116 01117 01118 /* 01119 Initialize this quick select to be a ROR-merged scan. 01120 01121 SYNOPSIS 01122 QUICK_RANGE_SELECT::init_ror_merged_scan() 01123 reuse_handler If TRUE, use head->file, otherwise create a separate 01124 handler object 01125 01126 NOTES 01127 This function creates and prepares for subsequent use a separate handler 01128 object if it can't reuse head->file. The reason for this is that during 01129 ROR-merge several key scans are performed simultaneously, and a single 01130 handler is only capable of preserving context of a single key scan. 01131 01132 In ROR-merge the quick select doing merge does full records retrieval, 01133 merged quick selects read only keys. 01134 01135 RETURN 01136 0 ROR child scan initialized, ok to use. 01137 1 error 01138 */ 01139 01140 int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) 01141 { 01142 handler *save_file= file, *org_file; 01143 THD *thd; 01144 MY_BITMAP *bitmap; 01145 DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); 01146 01147 in_ror_merged_scan= 1; 01148 if (reuse_handler) 01149 { 01150 DBUG_PRINT("info", ("Reusing handler 0x%lx", (long) file)); 01151 if (init() || reset()) 01152 { 01153 DBUG_RETURN(1); 01154 } 01155 head->column_bitmaps_set(&column_bitmap, &column_bitmap); 01156 goto end; 01157 } 01158 01159 /* Create a separate handler object for this quick select */ 01160 if (free_file) 01161 { 01162 /* already have own 'handler' object. */ 01163 DBUG_RETURN(0); 01164 } 01165 01166 thd= head->in_use; 01167 if (!(file= get_new_handler(head->s, thd->mem_root, head->s->db_type))) 01168 goto failure; 01169 DBUG_PRINT("info", ("Allocated new handler 0x%lx", (long) file)); 01170 if (file->ha_open(head, head->s->normalized_path.str, head->db_stat, 01171 HA_OPEN_IGNORE_IF_LOCKED)) 01172 { 01173 /* Caller will free the memory */ 01174 goto failure; 01175 } 01176 01177 head->column_bitmaps_set(&column_bitmap, &column_bitmap); 01178 01179 if (file->ha_external_lock(thd, F_RDLCK)) 01180 goto failure; 01181 01182 if (init() || reset()) 01183 { 01184 file->ha_external_lock(thd, F_UNLCK); 01185 file->close(); 01186 goto failure; 01187 } 01188 free_file= TRUE; 01189 last_rowid= file->ref; 01190 01191 end: 01192 /* 01193 We are only going to read key fields and call position() on 'file' 01194 The following sets head->tmp_set to only use this key and then updates 01195 head->read_set and head->write_set to use this bitmap. 01196 The now bitmap is stored in 'column_bitmap' which is used in ::get_next() 01197 */ 01198 org_file= head->file; 01199 head->file= file; 01200 /* We don't have to set 'head->keyread' here as the 'file' is unique */ 01201 head->mark_columns_used_by_index(index); 01202 head->prepare_for_position(); 01203 head->file= org_file; 01204 bitmap_copy(&column_bitmap, head->read_set); 01205 head->column_bitmaps_set(&column_bitmap, &column_bitmap); 01206 01207 DBUG_RETURN(0); 01208 01209 failure: 01210 head->column_bitmaps_set(save_read_set, save_write_set); 01211 delete file; 01212 file= save_file; 01213 DBUG_RETURN(1); 01214 } 01215 01216 01217 /* 01218 Initialize this quick select to be a part of a ROR-merged scan. 01219 SYNOPSIS 01220 QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan() 01221 reuse_handler If TRUE, use head->file, otherwise create separate 01222 handler object. 01223 RETURN 01224 0 OK 01225 other error code 01226 */ 01227 int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler) 01228 { 01229 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); 01230 QUICK_RANGE_SELECT* quick; 01231 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan"); 01232 01233 /* Initialize all merged "children" quick selects */ 01234 DBUG_ASSERT(!need_to_fetch_row || reuse_handler); 01235 if (!need_to_fetch_row && reuse_handler) 01236 { 01237 quick= quick_it++; 01238 /* 01239 There is no use of this->file. Use it for the first of merged range 01240 selects. 01241 */ 01242 if (quick->init_ror_merged_scan(TRUE)) 01243 DBUG_RETURN(1); 01244 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); 01245 } 01246 while ((quick= quick_it++)) 01247 { 01248 if (quick->init_ror_merged_scan(FALSE)) 01249 DBUG_RETURN(1); 01250 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); 01251 /* All merged scans share the same record buffer in intersection. */ 01252 quick->record= head->record[0]; 01253 } 01254 01255 if (need_to_fetch_row && head->file->ha_rnd_init(1)) 01256 { 01257 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); 01258 DBUG_RETURN(1); 01259 } 01260 DBUG_RETURN(0); 01261 } 01262 01263 01264 /* 01265 Initialize quick select for row retrieval. 01266 SYNOPSIS 01267 reset() 01268 RETURN 01269 0 OK 01270 other Error code 01271 */ 01272 01273 int QUICK_ROR_INTERSECT_SELECT::reset() 01274 { 01275 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); 01276 if (!scans_inited && init_ror_merged_scan(TRUE)) 01277 DBUG_RETURN(1); 01278 scans_inited= TRUE; 01279 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); 01280 QUICK_RANGE_SELECT *quick; 01281 while ((quick= it++)) 01282 quick->reset(); 01283 DBUG_RETURN(0); 01284 } 01285 01286 01287 /* 01288 Add a merged quick select to this ROR-intersection quick select. 01289 01290 SYNOPSIS 01291 QUICK_ROR_INTERSECT_SELECT::push_quick_back() 01292 quick Quick select to be added. The quick select must return 01293 rows in rowid order. 01294 NOTES 01295 This call can only be made before init() is called. 01296 01297 RETURN 01298 FALSE OK 01299 TRUE Out of memory. 01300 */ 01301 01302 bool 01303 QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) 01304 { 01305 return quick_selects.push_back(quick); 01306 } 01307 01308 QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() 01309 { 01310 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT"); 01311 quick_selects.delete_elements(); 01312 delete cpk_quick; 01313 free_root(&alloc,MYF(0)); 01314 if (need_to_fetch_row && head->file->inited != handler::NONE) 01315 head->file->ha_rnd_end(); 01316 DBUG_VOID_RETURN; 01317 } 01318 01319 01320 QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, 01321 TABLE *table) 01322 : thd(thd_param), scans_inited(FALSE) 01323 { 01324 index= MAX_KEY; 01325 head= table; 01326 rowid_length= table->file->ref_length; 01327 record= head->record[0]; 01328 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 01329 thd_param->mem_root= &alloc; 01330 } 01331 01332 01333 /* 01334 Do post-constructor initialization. 01335 SYNOPSIS 01336 QUICK_ROR_UNION_SELECT::init() 01337 01338 RETURN 01339 0 OK 01340 other Error code 01341 */ 01342 01343 int QUICK_ROR_UNION_SELECT::init() 01344 { 01345 DBUG_ENTER("QUICK_ROR_UNION_SELECT::init"); 01346 if (init_queue(&queue, quick_selects.elements, 0, 01347 FALSE , QUICK_ROR_UNION_SELECT::queue_cmp, 01348 (void*) this)) 01349 { 01350 bzero(&queue, sizeof(QUEUE)); 01351 DBUG_RETURN(1); 01352 } 01353 01354 if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length))) 01355 DBUG_RETURN(1); 01356 prev_rowid= cur_rowid + head->file->ref_length; 01357 DBUG_RETURN(0); 01358 } 01359 01360 01361 /* 01362 Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority 01363 queue. 01364 01365 SYNPOSIS 01366 QUICK_ROR_UNION_SELECT::queue_cmp() 01367 arg Pointer to QUICK_ROR_UNION_SELECT 01368 val1 First merged select 01369 val2 Second merged select 01370 */ 01371 01372 int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2) 01373 { 01374 QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; 01375 return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, 01376 ((QUICK_SELECT_I*)val2)->last_rowid); 01377 } 01378 01379 01380 /* 01381 Initialize quick select for row retrieval. 01382 SYNOPSIS 01383 reset() 01384 01385 RETURN 01386 0 OK 01387 other Error code 01388 */ 01389 01390 int QUICK_ROR_UNION_SELECT::reset() 01391 { 01392 QUICK_SELECT_I* quick; 01393 int error; 01394 DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset"); 01395 have_prev_rowid= FALSE; 01396 if (!scans_inited) 01397 { 01398 QUICK_SELECT_I *quick; 01399 List_iterator_fast<QUICK_SELECT_I> it(quick_selects); 01400 while ((quick= it++)) 01401 { 01402 if (quick->init_ror_merged_scan(FALSE)) 01403 DBUG_RETURN(1); 01404 } 01405 scans_inited= TRUE; 01406 } 01407 queue_remove_all(&queue); 01408 /* 01409 Initialize scans for merged quick selects and put all merged quick 01410 selects into the queue. 01411 */ 01412 List_iterator_fast<QUICK_SELECT_I> it(quick_selects); 01413 while ((quick= it++)) 01414 { 01415 if (quick->reset()) 01416 DBUG_RETURN(1); 01417 if ((error= quick->get_next())) 01418 { 01419 if (error == HA_ERR_END_OF_FILE) 01420 continue; 01421 DBUG_RETURN(error); 01422 } 01423 quick->save_last_pos(); 01424 queue_insert(&queue, (byte*)quick); 01425 } 01426 01427 if (head->file->ha_rnd_init(1)) 01428 { 01429 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); 01430 DBUG_RETURN(1); 01431 } 01432 01433 DBUG_RETURN(0); 01434 } 01435 01436 01437 bool 01438 QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) 01439 { 01440 return quick_selects.push_back(quick_sel_range); 01441 } 01442 01443 QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() 01444 { 01445 DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT"); 01446 delete_queue(&queue); 01447 quick_selects.delete_elements(); 01448 if (head->file->inited != handler::NONE) 01449 head->file->ha_rnd_end(); 01450 free_root(&alloc,MYF(0)); 01451 DBUG_VOID_RETURN; 01452 } 01453 01454 01455 QUICK_RANGE::QUICK_RANGE() 01456 :min_key(0),max_key(0),min_length(0),max_length(0), 01457 flag(NO_MIN_RANGE | NO_MAX_RANGE) 01458 {} 01459 01460 SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc() 01461 { 01462 type=arg.type; 01463 min_flag=arg.min_flag; 01464 max_flag=arg.max_flag; 01465 maybe_flag=arg.maybe_flag; 01466 maybe_null=arg.maybe_null; 01467 part=arg.part; 01468 field=arg.field; 01469 min_value=arg.min_value; 01470 max_value=arg.max_value; 01471 next_key_part=arg.next_key_part; 01472 use_count=1; elements=1; 01473 } 01474 01475 01476 inline void SEL_ARG::make_root() 01477 { 01478 left=right= &null_element; 01479 color=BLACK; 01480 next=prev=0; 01481 use_count=0; elements=1; 01482 } 01483 01484 SEL_ARG::SEL_ARG(Field *f,const char *min_value_arg,const char *max_value_arg) 01485 :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()), 01486 elements(1), use_count(1), field(f), min_value((char*) min_value_arg), 01487 max_value((char*) max_value_arg), next(0),prev(0), 01488 next_key_part(0),color(BLACK),type(KEY_RANGE) 01489 { 01490 left=right= &null_element; 01491 } 01492 01493 SEL_ARG::SEL_ARG(Field *field_,uint8 part_,char *min_value_,char *max_value_, 01494 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_) 01495 :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), 01496 part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1), 01497 field(field_), min_value(min_value_), max_value(max_value_), 01498 next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE) 01499 { 01500 left=right= &null_element; 01501 } 01502 01503 SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg) 01504 { 01505 SEL_ARG *tmp; 01506 if (type != KEY_RANGE) 01507 { 01508 if (!(tmp= new SEL_ARG(type))) 01509 return 0; // out of memory 01510 tmp->prev= *next_arg; // Link into next/prev chain 01511 (*next_arg)->next=tmp; 01512 (*next_arg)= tmp; 01513 } 01514 else 01515 { 01516 if (!(tmp= new SEL_ARG(field,part, min_value,max_value, 01517 min_flag, max_flag, maybe_flag))) 01518 return 0; // OOM 01519 tmp->parent=new_parent; 01520 tmp->next_key_part=next_key_part; 01521 if (left != &null_element) 01522 tmp->left=left->clone(tmp,next_arg); 01523 01524 tmp->prev= *next_arg; // Link into next/prev chain 01525 (*next_arg)->next=tmp; 01526 (*next_arg)= tmp; 01527 01528 if (right != &null_element) 01529 if (!(tmp->right= right->clone(tmp,next_arg))) 01530 return 0; // OOM 01531 } 01532 increment_use_count(1); 01533 tmp->color= color; 01534 tmp->elements= this->elements; 01535 return tmp; 01536 } 01537 01538 SEL_ARG *SEL_ARG::first() 01539 { 01540 SEL_ARG *next_arg=this; 01541 if (!next_arg->left) 01542 return 0; // MAYBE_KEY 01543 while (next_arg->left != &null_element) 01544 next_arg=next_arg->left; 01545 return next_arg; 01546 } 01547 01548 SEL_ARG *SEL_ARG::last() 01549 { 01550 SEL_ARG *next_arg=this; 01551 if (!next_arg->right) 01552 return 0; // MAYBE_KEY 01553 while (next_arg->right != &null_element) 01554 next_arg=next_arg->right; 01555 return next_arg; 01556 } 01557 01558 01559 /* 01560 Check if a compare is ok, when one takes ranges in account 01561 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2 01562 */ 01563 01564 static int sel_cmp(Field *field, char *a,char *b,uint8 a_flag,uint8 b_flag) 01565 { 01566 int cmp; 01567 /* First check if there was a compare to a min or max element */ 01568 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) 01569 { 01570 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) == 01571 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))) 01572 return 0; 01573 return (a_flag & NO_MIN_RANGE) ? -1 : 1; 01574 } 01575 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) 01576 return (b_flag & NO_MIN_RANGE) ? 1 : -1; 01577 01578 if (field->real_maybe_null()) // If null is part of key 01579 { 01580 if (*a != *b) 01581 { 01582 return *a ? -1 : 1; 01583 } 01584 if (*a) 01585 goto end; // NULL where equal 01586 a++; b++; // Skip NULL marker 01587 } 01588 cmp=field->key_cmp((byte*) a,(byte*) b); 01589 if (cmp) return cmp < 0 ? -1 : 1; // The values differed 01590 01591 // Check if the compared equal arguments was defined with open/closed range 01592 end: 01593 if (a_flag & (NEAR_MIN | NEAR_MAX)) 01594 { 01595 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX))) 01596 return 0; 01597 if (!(b_flag & (NEAR_MIN | NEAR_MAX))) 01598 return (a_flag & NEAR_MIN) ? 2 : -2; 01599 return (a_flag & NEAR_MIN) ? 1 : -1; 01600 } 01601 if (b_flag & (NEAR_MIN | NEAR_MAX)) 01602 return (b_flag & NEAR_MIN) ? -2 : 2; 01603 return 0; // The elements where equal 01604 } 01605 01606 01607 SEL_ARG *SEL_ARG::clone_tree() 01608 { 01609 SEL_ARG tmp_link,*next_arg,*root; 01610 next_arg= &tmp_link; 01611 root= clone((SEL_ARG *) 0, &next_arg); 01612 next_arg->next=0; // Fix last link 01613 tmp_link.next->prev=0; // Fix first link 01614 if (root) // If not OOM 01615 root->use_count= 0; 01616 return root; 01617 } 01618 01619 01620 /* 01621 Find the best index to retrieve first N records in given order 01622 01623 SYNOPSIS 01624 get_index_for_order() 01625 table Table to be accessed 01626 order Required ordering 01627 limit Number of records that will be retrieved 01628 01629 DESCRIPTION 01630 Find the best index that allows to retrieve first #limit records in the 01631 given order cheaper then one would retrieve them using full table scan. 01632 01633 IMPLEMENTATION 01634 Run through all table indexes and find the shortest index that allows 01635 records to be retrieved in given order. We look for the shortest index 01636 as we will have fewer index pages to read with it. 01637 01638 This function is used only by UPDATE/DELETE, so we take into account how 01639 the UPDATE/DELETE code will work: 01640 * index can only be scanned in forward direction 01641 * HA_EXTRA_KEYREAD will not be used 01642 Perhaps these assumptions could be relaxed 01643 01644 RETURN 01645 index number 01646 MAX_KEY if no such index was found. 01647 */ 01648 01649 uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit) 01650 { 01651 uint idx; 01652 uint match_key= MAX_KEY, match_key_len= MAX_KEY_LENGTH + 1; 01653 ORDER *ord; 01654 01655 for (ord= order; ord; ord= ord->next) 01656 if (!ord->asc) 01657 return MAX_KEY; 01658 01659 for (idx= 0; idx < table->s->keys; idx++) 01660 { 01661 if (!(table->keys_in_use_for_query.is_set(idx))) 01662 continue; 01663 KEY_PART_INFO *keyinfo= table->key_info[idx].key_part; 01664 uint partno= 0; 01665 01666 /* 01667 The below check is sufficient considering we now have either BTREE 01668 indexes (records are returned in order for any index prefix) or HASH 01669 indexes (records are not returned in order for any index prefix). 01670 */ 01671 if (!(table->file->index_flags(idx, 0, 1) & HA_READ_ORDER)) 01672 continue; 01673 for (ord= order; ord; ord= ord->next, partno++) 01674 { 01675 Item *item= order->item[0]; 01676 if (!(item->type() == Item::FIELD_ITEM && 01677 ((Item_field*)item)->field->eq(keyinfo[partno].field))) 01678 break; 01679 } 01680 01681 if (!ord && table->key_info[idx].key_length < match_key_len) 01682 { 01683 /* 01684 Ok, the ordering is compatible and this key is shorter then 01685 previous match (we want shorter keys as we'll have to read fewer 01686 index pages for the same number of records) 01687 */ 01688 match_key= idx; 01689 match_key_len= table->key_info[idx].key_length; 01690 } 01691 } 01692 01693 if (match_key != MAX_KEY) 01694 { 01695 /* 01696 Found an index that allows records to be retrieved in the requested 01697 order. Now we'll check if using the index is cheaper then doing a table 01698 scan. 01699 */ 01700 double full_scan_time= table->file->scan_time(); 01701 double index_scan_time= table->file->read_time(match_key, 1, limit); 01702 if (index_scan_time > full_scan_time) 01703 match_key= MAX_KEY; 01704 } 01705 return match_key; 01706 } 01707 01708 01709 /* 01710 Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived 01711 objects from table read plans. 01712 */ 01713 class TABLE_READ_PLAN 01714 { 01715 public: 01716 /* 01717 Plan read cost, with or without cost of full row retrieval, depending 01718 on plan creation parameters. 01719 */ 01720 double read_cost; 01721 ha_rows records; /* estimate of #rows to be examined */ 01722 01723 /* 01724 If TRUE, the scan returns rows in rowid order. This is used only for 01725 scans that can be both ROR and non-ROR. 01726 */ 01727 bool is_ror; 01728 01729 /* 01730 Create quick select for this plan. 01731 SYNOPSIS 01732 make_quick() 01733 param Parameter from test_quick_select 01734 retrieve_full_rows If TRUE, created quick select will do full record 01735 retrieval. 01736 parent_alloc Memory pool to use, if any. 01737 01738 NOTES 01739 retrieve_full_rows is ignored by some implementations. 01740 01741 RETURN 01742 created quick select 01743 NULL on any error. 01744 */ 01745 virtual QUICK_SELECT_I *make_quick(PARAM *param, 01746 bool retrieve_full_rows, 01747 MEM_ROOT *parent_alloc=NULL) = 0; 01748 01749 /* Table read plans are allocated on MEM_ROOT and are never deleted */ 01750 static void *operator new(size_t size, MEM_ROOT *mem_root) 01751 { return (void*) alloc_root(mem_root, (uint) size); } 01752 static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); } 01753 static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ } 01754 virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */ 01755 01756 }; 01757 01758 class TRP_ROR_INTERSECT; 01759 class TRP_ROR_UNION; 01760 class TRP_INDEX_MERGE; 01761 01762 01763 /* 01764 Plan for a QUICK_RANGE_SELECT scan. 01765 TRP_RANGE::make_quick ignores retrieve_full_rows parameter because 01766 QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full 01767 record retrieval scans. 01768 */ 01769 01770 class TRP_RANGE : public TABLE_READ_PLAN 01771 { 01772 public: 01773 SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */ 01774 uint key_idx; /* key number in PARAM::key */ 01775 01776 TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) 01777 : key(key_arg), key_idx(idx_arg) 01778 {} 01779 virtual ~TRP_RANGE() {} /* Remove gcc warning */ 01780 01781 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 01782 MEM_ROOT *parent_alloc) 01783 { 01784 DBUG_ENTER("TRP_RANGE::make_quick"); 01785 QUICK_RANGE_SELECT *quick; 01786 if ((quick= get_quick_select(param, key_idx, key, parent_alloc))) 01787 { 01788 quick->records= records; 01789 quick->read_time= read_cost; 01790 } 01791 DBUG_RETURN(quick); 01792 } 01793 }; 01794 01795 01796 /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */ 01797 01798 class TRP_ROR_INTERSECT : public TABLE_READ_PLAN 01799 { 01800 public: 01801 TRP_ROR_INTERSECT() {} /* Remove gcc warning */ 01802 virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */ 01803 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 01804 MEM_ROOT *parent_alloc); 01805 01806 /* Array of pointers to ROR range scans used in this intersection */ 01807 struct st_ror_scan_info **first_scan; 01808 struct st_ror_scan_info **last_scan; /* End of the above array */ 01809 struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */ 01810 bool is_covering; /* TRUE if no row retrieval phase is necessary */ 01811 double index_scan_costs; /* SUM(cost(index_scan)) */ 01812 }; 01813 01814 01815 /* 01816 Plan for QUICK_ROR_UNION_SELECT scan. 01817 QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows 01818 is ignored by make_quick. 01819 */ 01820 01821 class TRP_ROR_UNION : public TABLE_READ_PLAN 01822 { 01823 public: 01824 TRP_ROR_UNION() {} /* Remove gcc warning */ 01825 virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */ 01826 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 01827 MEM_ROOT *parent_alloc); 01828 TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */ 01829 TABLE_READ_PLAN **last_ror; /* end of the above array */ 01830 }; 01831 01832 01833 /* 01834 Plan for QUICK_INDEX_MERGE_SELECT scan. 01835 QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows 01836 is ignored by make_quick. 01837 */ 01838 01839 class TRP_INDEX_MERGE : public TABLE_READ_PLAN 01840 { 01841 public: 01842 TRP_INDEX_MERGE() {} /* Remove gcc warning */ 01843 virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */ 01844 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 01845 MEM_ROOT *parent_alloc); 01846 TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */ 01847 TRP_RANGE **range_scans_end; /* end of the array */ 01848 }; 01849 01850 01851 /* 01852 Plan for a QUICK_GROUP_MIN_MAX_SELECT scan. 01853 */ 01854 01855 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN 01856 { 01857 private: 01858 bool have_min, have_max; 01859 KEY_PART_INFO *min_max_arg_part; 01860 uint group_prefix_len; 01861 uint used_key_parts; 01862 uint group_key_parts; 01863 KEY *index_info; 01864 uint index; 01865 uint key_infix_len; 01866 byte key_infix[MAX_KEY_LENGTH]; 01867 SEL_TREE *range_tree; /* Represents all range predicates in the query. */ 01868 SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */ 01869 uint param_idx; /* Index of used key in param->key. */ 01870 /* Number of records selected by the ranges in index_tree. */ 01871 public: 01872 ha_rows quick_prefix_records; 01873 public: 01874 TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg, 01875 KEY_PART_INFO *min_max_arg_part_arg, 01876 uint group_prefix_len_arg, uint used_key_parts_arg, 01877 uint group_key_parts_arg, KEY *index_info_arg, 01878 uint index_arg, uint key_infix_len_arg, 01879 byte *key_infix_arg, 01880 SEL_TREE *tree_arg, SEL_ARG *index_tree_arg, 01881 uint param_idx_arg, ha_rows quick_prefix_records_arg) 01882 : have_min(have_min_arg), have_max(have_max_arg), 01883 min_max_arg_part(min_max_arg_part_arg), 01884 group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg), 01885 group_key_parts(group_key_parts_arg), index_info(index_info_arg), 01886 index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg), 01887 index_tree(index_tree_arg), param_idx(param_idx_arg), 01888 quick_prefix_records(quick_prefix_records_arg) 01889 { 01890 if (key_infix_len) 01891 memcpy(this->key_infix, key_infix_arg, key_infix_len); 01892 } 01893 virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */ 01894 01895 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, 01896 MEM_ROOT *parent_alloc); 01897 }; 01898 01899 01900 /* 01901 Fill param->needed_fields with bitmap of fields used in the query. 01902 SYNOPSIS 01903 fill_used_fields_bitmap() 01904 param Parameter from test_quick_select function. 01905 01906 NOTES 01907 Clustered PK members are not put into the bitmap as they are implicitly 01908 present in all keys (and it is impossible to avoid reading them). 01909 RETURN 01910 0 Ok 01911 1 Out of memory. 01912 */ 01913 01914 static int fill_used_fields_bitmap(PARAM *param) 01915 { 01916 TABLE *table= param->table; 01917 my_bitmap_map *tmp; 01918 uint pk; 01919 param->fields_bitmap_size= table->s->column_bitmap_size; 01920 if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root, 01921 param->fields_bitmap_size)) || 01922 bitmap_init(¶m->needed_fields, tmp, table->s->fields, FALSE)) 01923 return 1; 01924 01925 bitmap_copy(¶m->needed_fields, table->read_set); 01926 bitmap_union(¶m->needed_fields, table->write_set); 01927 01928 pk= param->table->s->primary_key; 01929 if (pk != MAX_KEY && param->table->file->primary_key_is_clustered()) 01930 { 01931 /* The table uses clustered PK and it is not internally generated */ 01932 KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; 01933 KEY_PART_INFO *key_part_end= key_part + 01934 param->table->key_info[pk].key_parts; 01935 for (;key_part != key_part_end; ++key_part) 01936 bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1); 01937 } 01938 return 0; 01939 } 01940 01941 01942 /* 01943 Test if a key can be used in different ranges 01944 01945 SYNOPSIS 01946 SQL_SELECT::test_quick_select() 01947 thd Current thread 01948 keys_to_use Keys to use for range retrieval 01949 prev_tables Tables assumed to be already read when the scan is 01950 performed (but not read at the moment of this call) 01951 limit Query limit 01952 force_quick_range Prefer to use range (instead of full table scan) even 01953 if it is more expensive. 01954 01955 NOTES 01956 Updates the following in the select parameter: 01957 needed_reg - Bits for keys with may be used if all prev regs are read 01958 quick - Parameter to use when reading records. 01959 01960 In the table struct the following information is updated: 01961 quick_keys - Which keys can be used 01962 quick_rows - How many rows the key matches 01963 quick_condition_rows - E(# rows that will satisfy the table condition) 01964 01965 IMPLEMENTATION 01966 quick_condition_rows value is obtained as follows: 01967 01968 It is a minimum of E(#output rows) for all considered table access 01969 methods (range and index_merge accesses over various indexes). 01970 01971 The obtained value is not a true E(#rows that satisfy table condition) 01972 but rather a pessimistic estimate. To obtain a true E(#...) one would 01973 need to combine estimates of various access methods, taking into account 01974 correlations between sets of rows they will return. 01975 01976 For example, if values of tbl.key1 and tbl.key2 are independent (a right 01977 assumption if we have no information about their correlation) then the 01978 correct estimate will be: 01979 01980 E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) = 01981 = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2) 01982 01983 which is smaller than 01984 01985 MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2))) 01986 01987 which is currently produced. 01988 01989 TODO 01990 * Change the value returned in quick_condition_rows from a pessimistic 01991 estimate to true E(#rows that satisfy table condition). 01992 (we can re-use some of E(#rows) calcuation code from index_merge/intersection 01993 for this) 01994 01995 * Check if this function really needs to modify keys_to_use, and change the 01996 code to pass it by reference if it doesn't. 01997 01998 * In addition to force_quick_range other means can be (an usually are) used 01999 to make this function prefer range over full table scan. Figure out if 02000 force_quick_range is really needed. 02001 02002 RETURN 02003 -1 if impossible select (i.e. certainly no rows will be selected) 02004 0 if can't use quick_select 02005 1 if found usable ranges and quick select has been successfully created. 02006 */ 02007 02008 int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, 02009 table_map prev_tables, 02010 ha_rows limit, bool force_quick_range) 02011 { 02012 uint idx; 02013 double scan_time; 02014 DBUG_ENTER("SQL_SELECT::test_quick_select"); 02015 DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", 02016 keys_to_use.to_ulonglong(), (ulong) prev_tables, 02017 (ulong) const_tables)); 02018 DBUG_PRINT("info", ("records: %lu", (ulong) head->file->stats.records)); 02019 delete quick; 02020 quick=0; 02021 needed_reg.clear_all(); 02022 quick_keys.clear_all(); 02023 if ((specialflag & SPECIAL_SAFE_MODE) && ! force_quick_range || 02024 !limit) 02025 DBUG_RETURN(0); /* purecov: inspected */ 02026 if (keys_to_use.is_clear_all()) 02027 DBUG_RETURN(0); 02028 records= head->file->stats.records; 02029 if (!records) 02030 records++; /* purecov: inspected */ 02031 scan_time= (double) records / TIME_FOR_COMPARE + 1; 02032 read_time= (double) head->file->scan_time() + scan_time + 1.1; 02033 if (head->force_index) 02034 scan_time= read_time= DBL_MAX; 02035 if (limit < records) 02036 read_time= (double) records + scan_time + 1; // Force to use index 02037 else if (read_time <= 2.0 && !force_quick_range) 02038 DBUG_RETURN(0); /* No need for quick select */ 02039 02040 DBUG_PRINT("info",("Time to scan table: %g", read_time)); 02041 02042 keys_to_use.intersect(head->keys_in_use_for_query); 02043 if (!keys_to_use.is_clear_all()) 02044 { 02045 MEM_ROOT alloc; 02046 SEL_TREE *tree= NULL; 02047 KEY_PART *key_parts; 02048 KEY *key_info; 02049 PARAM param; 02050 02051 /* set up parameter that is passed to all functions */ 02052 param.thd= thd; 02053 param.baseflag=head->file->ha_table_flags(); 02054 param.prev_tables=prev_tables | const_tables; 02055 param.read_tables=read_tables; 02056 param.current_table= head->map; 02057 param.table=head; 02058 param.keys=0; 02059 param.mem_root= &alloc; 02060 param.old_root= thd->mem_root; 02061 param.needed_reg= &needed_reg; 02062 param.imerge_cost_buff_size= 0; 02063 param.using_real_indexes= TRUE; 02064 param.remove_jump_scans= TRUE; 02065 02066 thd->no_errors=1; // Don't warn about NULL 02067 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 02068 if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc, 02069 sizeof(KEY_PART)* 02070 head->s->key_parts)) || 02071 fill_used_fields_bitmap(¶m)) 02072 { 02073 thd->no_errors=0; 02074 free_root(&alloc,MYF(0)); // Return memory & allocator 02075 DBUG_RETURN(0); // Can't use range 02076 } 02077 key_parts= param.key_parts; 02078 thd->mem_root= &alloc; 02079 02080 /* 02081 Make an array with description of all key parts of all table keys. 02082 This is used in get_mm_parts function. 02083 */ 02084 key_info= head->key_info; 02085 for (idx=0 ; idx < head->s->keys ; idx++, key_info++) 02086 { 02087 KEY_PART_INFO *key_part_info; 02088 if (!keys_to_use.is_set(idx)) 02089 continue; 02090 if (key_info->flags & HA_FULLTEXT) 02091 continue; // ToDo: ft-keys in non-ft ranges, if possible SerG 02092 02093 param.key[param.keys]=key_parts; 02094 key_part_info= key_info->key_part; 02095 for (uint part=0 ; part < key_info->key_parts ; 02096 part++, key_parts++, key_part_info++) 02097 { 02098 key_parts->key= param.keys; 02099 key_parts->part= part; 02100 key_parts->length= key_part_info->length; 02101 key_parts->store_length= key_part_info->store_length; 02102 key_parts->field= key_part_info->field; 02103 key_parts->null_bit= key_part_info->null_bit; 02104 key_parts->image_type = 02105 (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW; 02106 } 02107 param.real_keynr[param.keys++]=idx; 02108 } 02109 param.key_parts_end=key_parts; 02110 02111 /* Calculate cost of full index read for the shortest covering index */ 02112 if (!head->used_keys.is_clear_all()) 02113 { 02114 int key_for_use= find_shortest_key(head, &head->used_keys); 02115 double key_read_time= (get_index_only_read_time(¶m, records, 02116 key_for_use) + 02117 (double) records / TIME_FOR_COMPARE); 02118 DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " 02119 "read time %g", key_for_use, key_read_time)); 02120 if (key_read_time < read_time) 02121 read_time= key_read_time; 02122 } 02123 02124 TABLE_READ_PLAN *best_trp= NULL; 02125 TRP_GROUP_MIN_MAX *group_trp; 02126 double best_read_time= read_time; 02127 02128 if (cond) 02129 { 02130 if ((tree= get_mm_tree(¶m,cond))) 02131 { 02132 if (tree->type == SEL_TREE::IMPOSSIBLE) 02133 { 02134 records=0L; /* Return -1 from this function. */ 02135 read_time= (double) HA_POS_ERROR; 02136 goto free_mem; 02137 } 02138 /* 02139 If the tree can't be used for range scans, proceed anyway, as we 02140 can construct a group-min-max quick select 02141 */ 02142 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) 02143 tree= NULL; 02144 } 02145 } 02146 02147 /* 02148 Try to construct a QUICK_GROUP_MIN_MAX_SELECT. 02149 Notice that it can be constructed no matter if there is a range tree. 02150 */ 02151 group_trp= get_best_group_min_max(¶m, tree); 02152 if (group_trp) 02153 { 02154 param.table->quick_condition_rows= min(group_trp->records, 02155 head->file->stats.records); 02156 if (group_trp->read_cost < best_read_time) 02157 { 02158 best_trp= group_trp; 02159 best_read_time= best_trp->read_cost; 02160 } 02161 } 02162 02163 if (tree) 02164 { 02165 /* 02166 It is possible to use a range-based quick select (but it might be 02167 slower than 'all' table scan). 02168 */ 02169 if (tree->merges.is_empty()) 02170 { 02171 TRP_RANGE *range_trp; 02172 TRP_ROR_INTERSECT *rori_trp; 02173 bool can_build_covering= FALSE; 02174 02175 /* Get best 'range' plan and prepare data for making other plans */ 02176 if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE, 02177 best_read_time))) 02178 { 02179 best_trp= range_trp; 02180 best_read_time= best_trp->read_cost; 02181 } 02182 02183 /* 02184 Simultaneous key scans and row deletes on several handler 02185 objects are not allowed so don't use ROR-intersection for 02186 table deletes. 02187 */ 02188 if ((thd->lex->sql_command != SQLCOM_DELETE)) 02189 #ifdef NOT_USED 02190 if ((thd->lex->sql_command != SQLCOM_UPDATE)) 02191 #endif 02192 { 02193 /* 02194 Get best non-covering ROR-intersection plan and prepare data for 02195 building covering ROR-intersection. 02196 */ 02197 if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time, 02198 &can_build_covering))) 02199 { 02200 best_trp= rori_trp; 02201 best_read_time= best_trp->read_cost; 02202 /* 02203 Try constructing covering ROR-intersect only if it looks possible 02204 and worth doing. 02205 */ 02206 if (!rori_trp->is_covering && can_build_covering && 02207 (rori_trp= get_best_covering_ror_intersect(¶m, tree, 02208 best_read_time))) 02209 best_trp= rori_trp; 02210 } 02211 } 02212 } 02213 else 02214 { 02215 /* Try creating index_merge/ROR-union scan. */ 02216 SEL_IMERGE *imerge; 02217 TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; 02218 LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ 02219 DBUG_PRINT("info",("No range reads possible," 02220 " trying to construct index_merge")); 02221 List_iterator_fast<SEL_IMERGE> it(tree->merges); 02222 while ((imerge= it++)) 02223 { 02224 new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time); 02225 if (new_conj_trp) 02226 set_if_smaller(param.table->quick_condition_rows, 02227 new_conj_trp->records); 02228 if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < 02229 best_conj_trp->read_cost)) 02230 best_conj_trp= new_conj_trp; 02231 } 02232 if (best_conj_trp) 02233 best_trp= best_conj_trp; 02234 } 02235 } 02236 02237 thd->mem_root= param.old_root; 02238 02239 /* If we got a read plan, create a quick select from it. */ 02240 if (best_trp) 02241 { 02242 records= best_trp->records; 02243 if (!(quick= best_trp->make_quick(¶m, TRUE)) || quick->init()) 02244 { 02245 delete quick; 02246 quick= NULL; 02247 } 02248 } 02249 02250 free_mem: 02251 free_root(&alloc,MYF(0)); // Return memory & allocator 02252 thd->mem_root= param.old_root; 02253 thd->no_errors=0; 02254 } 02255 02256 DBUG_EXECUTE("info", print_quick(quick, &needed_reg);); 02257 02258 /* 02259 Assume that if the user is using 'limit' we will only need to scan 02260 limit rows if we are using a key 02261 */ 02262 DBUG_RETURN(records ? test(quick) : -1); 02263 } 02264 02265 /**************************************************************************** 02266 * Partition pruning module 02267 ****************************************************************************/ 02268 #ifdef WITH_PARTITION_STORAGE_ENGINE 02269 02270 /* 02271 PartitionPruningModule 02272 02273 This part of the code does partition pruning. Partition pruning solves the 02274 following problem: given a query over partitioned tables, find partitions 02275 that we will not need to access (i.e. partitions that we can assume to be 02276 empty) when executing the query. 02277 The set of partitions to prune doesn't depend on which query execution 02278 plan will be used to execute the query. 02279 02280 HOW IT WORKS 02281 02282 Partition pruning module makes use of RangeAnalysisModule. The following 02283 examples show how the problem of partition pruning can be reduced to the 02284 range analysis problem: 02285 02286 EXAMPLE 1 02287 Consider a query: 02288 02289 SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' 02290 02291 where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent 02292 way to find the used (i.e. not pruned away) partitions is as follows: 02293 02294 1. analyze the WHERE clause and extract the list of intervals over t1.a 02295 for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} 02296 02297 2. for each interval I 02298 { 02299 find partitions that have non-empty intersection with I; 02300 mark them as used; 02301 } 02302 02303 EXAMPLE 2 02304 Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then 02305 we need to: 02306 02307 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). 02308 The list of intervals we'll obtain will look like this: 02309 ((t1.a, t1.b) = (1,'foo')), 02310 ((t1.a, t1.b) = (2,'bar')), 02311 ((t1,a, t1.b) > (10,'zz')) 02312 02313 2. for each interval I 02314 { 02315 if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) 02316 { 02317 calculate HASH(part_func(t1.a, t1.b)); 02318 find which partition has records with this hash value and mark 02319 it as used; 02320 } 02321 else 02322 { 02323 mark all partitions as used; 02324 break; 02325 } 02326 } 02327 02328 For both examples the step #1 is exactly what RangeAnalysisModule could 02329 be used to do, if it was provided with appropriate index description 02330 (array of KEY_PART structures). 02331 In example #1, we need to provide it with description of index(t1.a), 02332 in example #2, we need to provide it with description of index(t1.a, t1.b). 02333 02334 These index descriptions are further called "partitioning index 02335 descriptions". Note that it doesn't matter if such indexes really exist, 02336 as range analysis module only uses the description. 02337 02338 Putting it all together, partitioning module works as follows: 02339 02340 prune_partitions() { 02341 call create_partition_index_description(); 02342 02343 call get_mm_tree(); // invoke the RangeAnalysisModule 02344 02345 // analyze the obtained interval list and get used partitions 02346 call find_used_partitions(); 02347 } 02348 02349 */ 02350 02351 struct st_part_prune_param; 02352 struct st_part_opt_info; 02353 02354 typedef void (*mark_full_part_func)(partition_info*, uint32); 02355 02356 /* 02357 Partition pruning operation context 02358 */ 02359 typedef struct st_part_prune_param 02360 { 02361 RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ 02362 02363 /*************************************************************** 02364 Following fields are filled in based solely on partitioning 02365 definition and not modified after that: 02366 **************************************************************/ 02367 partition_info *part_info; /* Copy of table->part_info */ 02368 /* Function to get partition id from partitioning fields only */ 02369 get_part_id_func get_top_partition_id_func; 02370 /* Function to mark a partition as used (w/all subpartitions if they exist)*/ 02371 mark_full_part_func mark_full_partition_used; 02372 02373 /* Partitioning 'index' description, array of key parts */ 02374 KEY_PART *key; 02375 02376 /* 02377 Number of fields in partitioning 'index' definition created for 02378 partitioning (0 if partitioning 'index' doesn't include partitioning 02379 fields) 02380 */ 02381 uint part_fields; 02382 uint subpart_fields; /* Same as above for subpartitioning */ 02383 02384 /* 02385 Number of the last partitioning field keypart in the index, or -1 if 02386 partitioning index definition doesn't include partitioning fields. 02387 */ 02388 int last_part_partno; 02389 int last_subpart_partno; /* Same as above for supartitioning */ 02390 02391 /* 02392 is_part_keypart[i] == test(keypart #i in partitioning index is a member 02393 used in partitioning) 02394 Used to maintain current values of cur_part_fields and cur_subpart_fields 02395 */ 02396 my_bool *is_part_keypart; 02397 /* Same as above for subpartitioning */ 02398 my_bool *is_subpart_keypart; 02399 02400 /*************************************************************** 02401 Following fields form find_used_partitions() recursion context: 02402 **************************************************************/ 02403 SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ 02404 SEL_ARG **arg_stack_end; /* Top of the stack */ 02405 /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ 02406 uint cur_part_fields; 02407 /* Same as cur_part_fields, but for subpartitioning */ 02408 uint cur_subpart_fields; 02409 02410 /* Iterator to be used to obtain the "current" set of used partitions */ 02411 PARTITION_ITERATOR part_iter; 02412 02413 /* Initialized bitmap of no_subparts size */ 02414 MY_BITMAP subparts_bitmap; 02415 } PART_PRUNE_PARAM; 02416 02417 static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par); 02418 static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); 02419 static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, 02420 SEL_IMERGE *imerge); 02421 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, 02422 List<SEL_IMERGE> &merges); 02423 static void mark_all_partitions_as_used(partition_info *part_info); 02424 static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, 02425 uint32 num); 02426 02427 #ifndef DBUG_OFF 02428 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); 02429 static void dbug_print_field(Field *field); 02430 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); 02431 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num); 02432 #endif 02433 02434 02435 /* 02436 Perform partition pruning for a given table and condition. 02437 02438 SYNOPSIS 02439 prune_partitions() 02440 thd Thread handle 02441 table Table to perform partition pruning for 02442 pprune_cond Condition to use for partition pruning 02443 02444 DESCRIPTION 02445 This function assumes that all partitions are marked as unused when it 02446 is invoked. The function analyzes the condition, finds partitions that 02447 need to be used to retrieve the records that match the condition, and 02448 marks them as used by setting appropriate bit in part_info->used_partitions 02449 In the worst case all partitions are marked as used. 02450 02451 NOTE 02452 This function returns promptly if called for non-partitioned table. 02453 02454 RETURN 02455 TRUE We've inferred that no partitions need to be used (i.e. no table 02456 records will satisfy pprune_cond) 02457 FALSE Otherwise 02458 */ 02459 02460 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) 02461 { 02462 bool retval= FALSE; 02463 partition_info *part_info = table->part_info; 02464 DBUG_ENTER("prune_partitions"); 02465 02466 if (!part_info) 02467 DBUG_RETURN(FALSE); /* not a partitioned table */ 02468 02469 if (!pprune_cond) 02470 { 02471 mark_all_partitions_as_used(part_info); 02472 DBUG_RETURN(FALSE); 02473 } 02474 02475 PART_PRUNE_PARAM prune_param; 02476 MEM_ROOT alloc; 02477 RANGE_OPT_PARAM *range_par= &prune_param.range_param; 02478 my_bitmap_map *old_read_set, *old_write_set; 02479 02480 prune_param.part_info= part_info; 02481 init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); 02482 range_par->mem_root= &alloc; 02483 range_par->old_root= thd->mem_root; 02484 02485 if (create_partition_index_description(&prune_param)) 02486 { 02487 mark_all_partitions_as_used(part_info); 02488 free_root(&alloc,MYF(0)); // Return memory & allocator 02489 DBUG_RETURN(FALSE); 02490 } 02491 02492 old_write_set= dbug_tmp_use_all_columns(table, table->write_set); 02493 old_read_set= dbug_tmp_use_all_columns(table, table->read_set); 02494 range_par->thd= thd; 02495 range_par->table= table; 02496 /* range_par->cond doesn't need initialization */ 02497 range_par->prev_tables= range_par->read_tables= 0; 02498 range_par->current_table= table->map; 02499 02500 range_par->keys= 1; // one index 02501 range_par->using_real_indexes= FALSE; 02502 range_par->remove_jump_scans= FALSE; 02503 range_par->real_keynr[0]= 0; 02504 02505 thd->no_errors=1; // Don't warn about NULL 02506 thd->mem_root=&alloc; 02507 02508 bitmap_clear_all(&part_info->used_partitions); 02509 02510 prune_param.key= prune_param.range_param.key_parts; 02511 SEL_TREE *tree; 02512 SEL_ARG *arg; 02513 int res; 02514 02515 tree= get_mm_tree(range_par, pprune_cond); 02516 if (!tree) 02517 goto all_used; 02518 02519 if (tree->type == SEL_TREE::IMPOSSIBLE) 02520 { 02521 retval= TRUE; 02522 goto end; 02523 } 02524 02525 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) 02526 goto all_used; 02527 02528 if (tree->merges.is_empty()) 02529 { 02530 /* Range analysis has produced a single list of intervals. */ 02531 prune_param.arg_stack_end= prune_param.arg_stack; 02532 prune_param.cur_part_fields= 0; 02533 prune_param.cur_subpart_fields= 0; 02534 init_all_partitions_iterator(part_info, &prune_param.part_iter); 02535 if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, 02536 tree->keys[0])))) 02537 goto all_used; 02538 } 02539 else 02540 { 02541 if (tree->merges.elements == 1) 02542 { 02543 /* 02544 Range analysis has produced a "merge" of several intervals lists, a 02545 SEL_TREE that represents an expression in form 02546 sel_imerge = (tree1 OR tree2 OR ... OR treeN) 02547 that cannot be reduced to one tree. This can only happen when 02548 partitioning index has several keyparts and the condition is OR of 02549 conditions that refer to different key parts. For example, we'll get 02550 here for "partitioning_field=const1 OR subpartitioning_field=const2" 02551 */ 02552 if (-1 == (res= find_used_partitions_imerge(&prune_param, 02553 tree->merges.head()))) 02554 goto all_used; 02555 } 02556 else 02557 { 02558 /* 02559 Range analysis has produced a list of several imerges, i.e. a 02560 structure that represents a condition in form 02561 imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN) 02562 This is produced for complicated WHERE clauses that range analyzer 02563 can't really analyze properly. 02564 */ 02565 if (-1 == (res= find_used_partitions_imerge_list(&prune_param, 02566 tree->merges))) 02567 goto all_used; 02568 } 02569 } 02570 02571 /* 02572 res == 0 => no used partitions => retval=TRUE 02573 res == 1 => some used partitions => retval=FALSE 02574 res == -1 - we jump over this line to all_used: 02575 */ 02576 retval= test(!res); 02577 goto end; 02578 02579 all_used: 02580 retval= FALSE; // some partitions are used 02581 mark_all_partitions_as_used(prune_param.part_info); 02582 end: 02583 dbug_tmp_restore_column_map(table->write_set, old_write_set); 02584 dbug_tmp_restore_column_map(table->read_set, old_read_set); 02585 thd->no_errors=0; 02586 thd->mem_root= range_par->old_root; 02587 free_root(&alloc,MYF(0)); // Return memory & allocator 02588 DBUG_RETURN(retval); 02589 } 02590 02591 02592 /* 02593 Store field key image to table record 02594 02595 SYNOPSIS 02596 store_key_image_to_rec() 02597 field Field which key image should be stored 02598 ptr Field value in key format 02599 len Length of the value, in bytes 02600 02601 DESCRIPTION 02602 Copy the field value from its key image to the table record. The source 02603 is the value in key image format, occupying len bytes in buffer pointed 02604 by ptr. The destination is table record, in "field value in table record" 02605 format. 02606 */ 02607 02608 void store_key_image_to_rec(Field *field, char *ptr, uint len) 02609 { 02610 /* Do the same as print_key() does */ 02611 my_bitmap_map *old_map; 02612 02613 if (field->real_maybe_null()) 02614 { 02615 if (*ptr) 02616 { 02617 field->set_null(); 02618 return; 02619 } 02620 field->set_notnull(); 02621 ptr++; 02622 } 02623 old_map= dbug_tmp_use_all_columns(field->table, 02624 field->table->write_set); 02625 field->set_key_image(ptr, len); 02626 dbug_tmp_restore_column_map(field->table->write_set, old_map); 02627 } 02628 02629 02630 /* 02631 For SEL_ARG* array, store sel_arg->min values into table record buffer 02632 02633 SYNOPSIS 02634 store_selargs_to_rec() 02635 ppar Partition pruning context 02636 start Array of SEL_ARG* for which the minimum values should be stored 02637 num Number of elements in the array 02638 02639 DESCRIPTION 02640 For each SEL_ARG* interval in the specified array, store the left edge 02641 field value (sel_arg->min, key image format) into the table record. 02642 */ 02643 02644 static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, 02645 int num) 02646 { 02647 KEY_PART *parts= ppar->range_param.key_parts; 02648 for (SEL_ARG **end= start + num; start != end; start++) 02649 { 02650 SEL_ARG *sel_arg= (*start); 02651 store_key_image_to_rec(sel_arg->field, sel_arg->min_value, 02652 parts[sel_arg->part].length); 02653 } 02654 } 02655 02656 02657 /* Mark a partition as used in the case when there are no subpartitions */ 02658 static void mark_full_partition_used_no_parts(partition_info* part_info, 02659 uint32 part_id) 02660 { 02661 DBUG_ENTER("mark_full_partition_used_no_parts"); 02662 DBUG_PRINT("enter", ("Mark partition %u as used", part_id)); 02663 bitmap_set_bit(&part_info->used_partitions, part_id); 02664 DBUG_VOID_RETURN; 02665 } 02666 02667 02668 /* Mark a partition as used in the case when there are subpartitions */ 02669 static void mark_full_partition_used_with_parts(partition_info *part_info, 02670 uint32 part_id) 02671 { 02672 uint32 start= part_id * part_info->no_subparts; 02673 uint32 end= start + part_info->no_subparts; 02674 DBUG_ENTER("mark_full_partition_used_with_parts"); 02675 02676 for (; start != end; start++) 02677 { 02678 DBUG_PRINT("info", ("1:Mark subpartition %u as used", start)); 02679 bitmap_set_bit(&part_info->used_partitions, start); 02680 } 02681 DBUG_VOID_RETURN; 02682 } 02683 02684 /* 02685 Find the set of used partitions for List<SEL_IMERGE> 02686 SYNOPSIS 02687 find_used_partitions_imerge_list 02688 ppar Partition pruning context. 02689 key_tree Intervals tree to perform pruning for. 02690 02691 DESCRIPTION 02692 List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". 02693 The set of used partitions is an intersection of used partitions sets 02694 for imerge_{i}. 02695 We accumulate this intersection in a separate bitmap. 02696 02697 RETURN 02698 See find_used_partitions() 02699 */ 02700 02701 static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, 02702 List<SEL_IMERGE> &merges) 02703 { 02704 MY_BITMAP all_merges; 02705 uint bitmap_bytes; 02706 my_bitmap_map *bitmap_buf; 02707 uint n_bits= ppar->part_info->used_partitions.n_bits; 02708 bitmap_bytes= bitmap_buffer_size(n_bits); 02709 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root, 02710 bitmap_bytes))) 02711 { 02712 /* 02713 Fallback, process just the first SEL_IMERGE. This can leave us with more 02714 partitions marked as used then actually needed. 02715 */ 02716 return find_used_partitions_imerge(ppar, merges.head()); 02717 } 02718 bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE); 02719 bitmap_set_prefix(&all_merges, n_bits); 02720 02721 List_iterator<SEL_IMERGE> it(merges); 02722 SEL_IMERGE *imerge; 02723 while ((imerge=it++)) 02724 { 02725 int res= find_used_partitions_imerge(ppar, imerge); 02726 if (!res) 02727 { 02728 /* no used partitions on one ANDed imerge => no used partitions at all */ 02729 return 0; 02730 } 02731 02732 if (res != -1) 02733 bitmap_intersect(&all_merges, &ppar->part_info->used_partitions); 02734 02735 if (bitmap_is_clear_all(&all_merges)) 02736 return 0; 02737 02738 bitmap_clear_all(&ppar->part_info->used_partitions); 02739 } 02740 memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap, 02741 bitmap_bytes); 02742 return 1; 02743 } 02744 02745 02746 /* 02747 Find the set of used partitions for SEL_IMERGE structure 02748 SYNOPSIS 02749 find_used_partitions_imerge() 02750 ppar Partition pruning context. 02751 key_tree Intervals tree to perform pruning for. 02752 02753 DESCRIPTION 02754 SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is 02755 trivial - just use mark used partitions for each tree and bail out early 02756 if for some tree_{i} all partitions are used. 02757 02758 RETURN 02759 See find_used_partitions(). 02760 */ 02761 02762 static 02763 int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) 02764 { 02765 int res= 0; 02766 for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) 02767 { 02768 ppar->arg_stack_end= ppar->arg_stack; 02769 ppar->cur_part_fields= 0; 02770 ppar->cur_subpart_fields= 0; 02771 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); 02772 SEL_ARG *key_tree= (*ptree)->keys[0]; 02773 if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree)))) 02774 return -1; 02775 } 02776 return res; 02777 } 02778 02779 02780 /* 02781 Collect partitioning ranges for the SEL_ARG tree and mark partitions as used 02782 02783 SYNOPSIS 02784 find_used_partitions() 02785 ppar Partition pruning context. 02786 key_tree SEL_ARG range tree to perform pruning for 02787 02788 DESCRIPTION 02789 This function 02790 * recursively walks the SEL_ARG* tree collecting partitioning "intervals" 02791 * finds the partitions one needs to use to get rows in these intervals 02792 * marks these partitions as used. 02793 The next session desribes the process in greater detail. 02794 02795 IMPLEMENTATION 02796 TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR 02797 We can find out which [sub]partitions to use if we obtain restrictions on 02798 [sub]partitioning fields in the following form: 02799 1. "partition_field1=const1 AND ... AND partition_fieldN=constN" 02800 1.1 Same as (1) but for subpartition fields 02801 02802 If partitioning supports interval analysis (i.e. partitioning is a 02803 function of a single table field, and partition_info:: 02804 get_part_iter_for_interval != NULL), then we can also use condition in 02805 this form: 02806 2. "const1 <=? partition_field <=? const2" 02807 2.1 Same as (2) but for subpartition_field 02808 02809 INFERRING THE RESTRICTIONS FROM SEL_ARG TREE 02810 02811 The below is an example of what SEL_ARG tree may represent: 02812 02813 (start) 02814 | $ 02815 | Partitioning keyparts $ subpartitioning keyparts 02816 | $ 02817 | ... ... $ 02818 | | | $ 02819 | +---------+ +---------+ $ +-----------+ +-----------+ 02820 \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5| 02821 +---------+ +---------+ $ +-----------+ +-----------+ 02822 | $ | | 02823 | $ | +-----------+ 02824 | $ | | subpar2=c6| 02825 | $ | +-----------+ 02826 | $ | 02827 | $ +-----------+ +-----------+ 02828 | $ | subpar1=c4|--| subpar2=c8| 02829 | $ +-----------+ +-----------+ 02830 | $ 02831 | $ 02832 +---------+ $ +------------+ +------------+ 02833 | par1=c2 |------------------| subpar1=c10|--| subpar2=c12| 02834 +---------+ $ +------------+ +------------+ 02835 | $ 02836 ... $ 02837 02838 The up-down connections are connections via SEL_ARG::left and 02839 SEL_ARG::right. A horizontal connection to the right is the 02840 SEL_ARG::next_key_part connection. 02841 02842 find_used_partitions() traverses the entire tree via recursion on 02843 * SEL_ARG::next_key_part (from left to right on the picture) 02844 * SEL_ARG::left|right (up/down on the pic). Left-right recursion is 02845 performed for each depth level. 02846 02847 Recursion descent on SEL_ARG::next_key_part is used to accumulate (in 02848 ppar->arg_stack) constraints on partitioning and subpartitioning fields. 02849 For the example in the above picture, one of stack states is: 02850 in find_used_partitions(key_tree = "subpar2=c5") (***) 02851 in find_used_partitions(key_tree = "subpar1=c3") 02852 in find_used_partitions(key_tree = "par2=c2") (**) 02853 in find_used_partitions(key_tree = "par1=c1") 02854 in prune_partitions(...) 02855 We apply partitioning limits as soon as possible, e.g. when we reach the 02856 depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2", 02857 and save them in ppar->part_iter. 02858 When we reach the depth (***), we find which subpartition(s) correspond to 02859 "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in 02860 appropriate subpartitions as used. 02861 02862 It is possible that constraints on some partitioning fields are missing. 02863 For the above example, consider this stack state: 02864 in find_used_partitions(key_tree = "subpar2=c12") (***) 02865 in find_used_partitions(key_tree = "subpar1=c10") 02866 in find_used_partitions(key_tree = "par1=c2") 02867 in prune_partitions(...) 02868 Here we don't have constraints for all partitioning fields. Since we've 02869 never set the ppar->part_iter to contain used set of partitions, we use 02870 its default "all partitions" value. We get subpartition id for 02871 "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every 02872 partition. 02873 02874 The inverse is also possible: we may get constraints on partitioning 02875 fields, but not constraints on subpartitioning fields. In that case, 02876 calls to find_used_partitions() with depth below (**) will return -1, 02877 and we will mark entire partition as used. 02878 02879 TODO 02880 Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop 02881 02882 RETURN 02883 1 OK, one or more [sub]partitions are marked as used. 02884 0 The passed condition doesn't match any partitions 02885 -1 Couldn't infer any partition pruning "intervals" from the passed 02886 SEL_ARG* tree (which means that all partitions should be marked as 02887 used) Marking partitions as used is the responsibility of the caller. 02888 */ 02889 02890 static 02891 int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) 02892 { 02893 int res, left_res=0, right_res=0; 02894 int partno= (int)key_tree->part; 02895 bool pushed= FALSE; 02896 bool set_full_part_if_bad_ret= FALSE; 02897 02898 if (key_tree->left != &null_element) 02899 { 02900 if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) 02901 return -1; 02902 } 02903 02904 if (key_tree->type == SEL_ARG::KEY_RANGE) 02905 { 02906 if (partno == 0 && (NULL != ppar->part_info->get_part_iter_for_interval)) 02907 { 02908 /* 02909 Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and 02910 we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change 02911 */ 02912 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, 02913 ppar->range_param. 02914 key_parts);); 02915 res= ppar->part_info-> 02916 get_part_iter_for_interval(ppar->part_info, 02917 FALSE, 02918 key_tree->min_value, 02919 key_tree->max_value, 02920 key_tree->min_flag | key_tree->max_flag, 02921 &ppar->part_iter); 02922 if (!res) 02923 goto go_right; /* res==0 --> no satisfying partitions */ 02924 if (res == -1) 02925 { 02926 //get a full range iterator 02927 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); 02928 } 02929 /* 02930 Save our intent to mark full partition as used if we will not be able 02931 to obtain further limits on subpartitions 02932 */ 02933 set_full_part_if_bad_ret= TRUE; 02934 goto process_next_key_part; 02935 } 02936 02937 if (partno == ppar->last_subpart_partno && 02938 (NULL != ppar->part_info->get_subpart_iter_for_interval)) 02939 { 02940 PARTITION_ITERATOR subpart_iter; 02941 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, 02942 ppar->range_param. 02943 key_parts);); 02944 res= ppar->part_info-> 02945 get_subpart_iter_for_interval(ppar->part_info, 02946 TRUE, 02947 key_tree->min_value, 02948 key_tree->max_value, 02949 key_tree->min_flag | key_tree->max_flag, 02950 &subpart_iter); 02951 DBUG_ASSERT(res); /* We can't get "no satisfying subpartitions" */ 02952 if (res == -1) 02953 return -1; /* all subpartitions satisfy */ 02954 02955 uint32 subpart_id; 02956 bitmap_clear_all(&ppar->subparts_bitmap); 02957 while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != 02958 NOT_A_PARTITION_ID) 02959 bitmap_set_bit(&ppar->subparts_bitmap, subpart_id); 02960 02961 /* Mark each partition as used in each subpartition. */ 02962 uint32 part_id; 02963 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != 02964 NOT_A_PARTITION_ID) 02965 { 02966 for (uint i= 0; i < ppar->part_info->no_subparts; i++) 02967 if (bitmap_is_set(&ppar->subparts_bitmap, i)) 02968 bitmap_set_bit(&ppar->part_info->used_partitions, 02969 part_id * ppar->part_info->no_subparts + i); 02970 } 02971 goto go_right; 02972 } 02973 02974 if (key_tree->is_singlepoint()) 02975 { 02976 pushed= TRUE; 02977 ppar->cur_part_fields+= ppar->is_part_keypart[partno]; 02978 ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno]; 02979 *(ppar->arg_stack_end++) = key_tree; 02980 02981 if (partno == ppar->last_part_partno && 02982 ppar->cur_part_fields == ppar->part_fields) 02983 { 02984 /* 02985 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning 02986 fields. Save all constN constants into table record buffer. 02987 */ 02988 store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); 02989 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack, 02990 ppar->part_fields);); 02991 uint32 part_id; 02992 longlong func_value; 02993 /* Find in which partition the {const1, ...,constN} tuple goes */ 02994 if (ppar->get_top_partition_id_func(ppar->part_info, &part_id, 02995 &func_value)) 02996 { 02997 res= 0; /* No satisfying partitions */ 02998 goto pop_and_go_right; 02999 } 03000 /* Rembember the limit we got - single partition #part_id */ 03001 init_single_partition_iterator(part_id, &ppar->part_iter); 03002 03003 /* 03004 If there are no subpartitions/we fail to get any limit for them, 03005 then we'll mark full partition as used. 03006 */ 03007 set_full_part_if_bad_ret= TRUE; 03008 goto process_next_key_part; 03009 } 03010 03011 if (partno == ppar->last_subpart_partno && 03012 ppar->cur_subpart_fields == ppar->subpart_fields) 03013 { 03014 /* 03015 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning 03016 fields. Save all constN constants into table record buffer. 03017 */ 03018 store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, 03019 ppar->subpart_fields); 03020 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end- 03021 ppar->subpart_fields, 03022 ppar->subpart_fields);); 03023 /* Find the subpartition (it's HASH/KEY so we always have one) */ 03024 partition_info *part_info= ppar->part_info; 03025 uint32 subpart_id= part_info->get_subpartition_id(part_info); 03026 03027 /* Mark this partition as used in each subpartition. */ 03028 uint32 part_id; 03029 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != 03030 NOT_A_PARTITION_ID) 03031 { 03032 bitmap_set_bit(&part_info->used_partitions, 03033 part_id * part_info->no_subparts + subpart_id); 03034 } 03035 res= 1; /* Some partitions were marked as used */ 03036 goto pop_and_go_right; 03037 } 03038 } 03039 else 03040 { 03041 /* 03042 Can't handle condition on current key part. If we're that deep that 03043 we're processing subpartititoning's key parts, this means we'll not be 03044 able to infer any suitable condition, so bail out. 03045 */ 03046 if (partno >= ppar->last_part_partno) 03047 return -1; 03048 } 03049 } 03050 03051 process_next_key_part: 03052 if (key_tree->next_key_part) 03053 res= find_used_partitions(ppar, key_tree->next_key_part); 03054 else 03055 res= -1; 03056 03057 if (set_full_part_if_bad_ret) 03058 { 03059 if (res == -1) 03060 { 03061 /* Got "full range" for subpartitioning fields */ 03062 uint32 part_id; 03063 bool found= FALSE; 03064 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != 03065 NOT_A_PARTITION_ID) 03066 { 03067 ppar->mark_full_partition_used(ppar->part_info, part_id); 03068 found= TRUE; 03069 } 03070 res= test(found); 03071 } 03072 /* 03073 Restore the "used partitions iterator" to the default setting that 03074 specifies iteration over all partitions. 03075 */ 03076 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); 03077 } 03078 03079 if (pushed) 03080 { 03081 pop_and_go_right: 03082 /* Pop this key part info off the "stack" */ 03083 ppar->arg_stack_end--; 03084 ppar->cur_part_fields-= ppar->is_part_keypart[partno]; 03085 ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno]; 03086 } 03087 03088 if (res == -1) 03089 return -1; 03090 go_right: 03091 if (key_tree->right != &null_element) 03092 { 03093 if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) 03094 return -1; 03095 } 03096 return (left_res || right_res || res); 03097 } 03098 03099 03100 static void mark_all_partitions_as_used(partition_info *part_info) 03101 { 03102 bitmap_set_all(&part_info->used_partitions); 03103 } 03104 03105 03106 /* 03107 Check if field types allow to construct partitioning index description 03108 03109 SYNOPSIS 03110 fields_ok_for_partition_index() 03111 pfield NULL-terminated array of pointers to fields. 03112 03113 DESCRIPTION 03114 For an array of fields, check if we can use all of the fields to create 03115 partitioning index description. 03116 03117 We can't process GEOMETRY fields - for these fields singlepoint intervals 03118 cant be generated, and non-singlepoint are "special" kinds of intervals 03119 to which our processing logic can't be applied. 03120 03121 It is not known if we could process ENUM fields, so they are disabled to be 03122 on the safe side. 03123 03124 RETURN 03125 TRUE Yes, fields can be used in partitioning index 03126 FALSE Otherwise 03127 */ 03128 03129 static bool fields_ok_for_partition_index(Field **pfield) 03130 { 03131 if (!pfield) 03132 return FALSE; 03133 for (; (*pfield); pfield++) 03134 { 03135 enum_field_types ftype= (*pfield)->real_type(); 03136 if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY) 03137 return FALSE; 03138 } 03139 return TRUE; 03140 } 03141 03142 03143 /* 03144 Create partition index description and fill related info in the context 03145 struct 03146 03147 SYNOPSIS 03148 create_partition_index_description() 03149 prune_par INOUT Partition pruning context 03150 03151 DESCRIPTION 03152 Create partition index description. Partition index description is: 03153 03154 part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) 03155 03156 If partitioning/sub-partitioning uses BLOB or Geometry fields, then 03157 corresponding fields_list(...) is not included into index description 03158 and we don't perform partition pruning for partitions/subpartitions. 03159 03160 RETURN 03161 TRUE Out of memory or can't do partition pruning at all 03162 FALSE OK 03163 */ 03164 03165 static bool create_partition_index_description(PART_PRUNE_PARAM *ppar) 03166 { 03167 RANGE_OPT_PARAM *range_par= &(ppar->range_param); 03168 partition_info *part_info= ppar->part_info; 03169 uint used_part_fields, used_subpart_fields; 03170 03171 used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? 03172 part_info->no_part_fields : 0; 03173 used_subpart_fields= 03174 fields_ok_for_partition_index(part_info->subpart_field_array)? 03175 part_info->no_subpart_fields : 0; 03176 03177 uint total_parts= used_part_fields + used_subpart_fields; 03178 03179 ppar->part_fields= used_part_fields; 03180 ppar->last_part_partno= (int)used_part_fields - 1; 03181 03182 ppar->subpart_fields= used_subpart_fields; 03183 ppar->last_subpart_partno= 03184 used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; 03185 03186 if (part_info->is_sub_partitioned()) 03187 { 03188 ppar->mark_full_partition_used= mark_full_partition_used_with_parts; 03189 ppar->get_top_partition_id_func= part_info->get_part_partition_id; 03190 } 03191 else 03192 { 03193 ppar->mark_full_partition_used= mark_full_partition_used_no_parts; 03194 ppar->get_top_partition_id_func= part_info->get_partition_id; 03195 } 03196 03197 KEY_PART *key_part; 03198 MEM_ROOT *alloc= range_par->mem_root; 03199 if (!total_parts || 03200 !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* 03201 total_parts)) || 03202 !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* 03203 total_parts)) || 03204 !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* 03205 total_parts)) || 03206 !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* 03207 total_parts))) 03208 return TRUE; 03209 03210 if (ppar->subpart_fields) 03211 { 03212 my_bitmap_map *buf; 03213 uint32 bufsize= bitmap_buffer_size(ppar->part_info->no_subparts); 03214 if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize))) 03215 return TRUE; 03216 bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->no_subparts, 03217 FALSE); 03218 } 03219 range_par->key_parts= key_part; 03220 Field **field= (ppar->part_fields)? part_info->part_field_array : 03221 part_info->subpart_field_array; 03222 bool in_subpart_fields= FALSE; 03223 for (uint part= 0; part < total_parts; part++, key_part++) 03224 { 03225 key_part->key= 0; 03226 key_part->part= part; 03227 key_part->length= (*field)->pack_length_in_rec(); 03228 /* 03229 psergey-todo: check yet again if this is correct for tricky field types, 03230 e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm() 03231 */ 03232 key_part->store_length= (*field)->pack_length(); 03233 if ((*field)->real_maybe_null()) 03234 key_part->store_length+= HA_KEY_NULL_LENGTH; 03235 if ((*field)->type() == FIELD_TYPE_BLOB || 03236 (*field)->real_type() == MYSQL_TYPE_VARCHAR) 03237 key_part->store_length+= HA_KEY_BLOB_LENGTH; 03238 03239 key_part->field= (*field); 03240 key_part->image_type = Field::itRAW; 03241 /* We don't set key_parts->null_bit as it will not be used */ 03242 03243 ppar->is_part_keypart[part]= !in_subpart_fields; 03244 ppar->is_subpart_keypart[part]= in_subpart_fields; 03245 03246 /* 03247 Check if this was last field in this array, in this case we 03248 switch to subpartitioning fields. (This will only happens if 03249 there are subpartitioning fields to cater for). 03250 */ 03251 if (!*(++field)) 03252 { 03253 field= part_info->subpart_field_array; 03254 in_subpart_fields= TRUE; 03255 } 03256 } 03257 range_par->key_parts_end= key_part; 03258 03259 DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts, 03260 range_par->key_parts_end);); 03261 return FALSE; 03262 } 03263 03264 03265 #ifndef DBUG_OFF 03266 03267 static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) 03268 { 03269 DBUG_ENTER("print_partitioning_index"); 03270 DBUG_LOCK_FILE; 03271 fprintf(DBUG_FILE, "partitioning INDEX("); 03272 for (KEY_PART *p=parts; p != parts_end; p++) 03273 { 03274 fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); 03275 } 03276 fputs(");\n", DBUG_FILE); 03277 DBUG_UNLOCK_FILE; 03278 DBUG_VOID_RETURN; 03279 } 03280 03281 /* Print field value into debug trace, in NULL-aware way. */ 03282 static void dbug_print_field(Field *field) 03283 { 03284 if (field->is_real_null()) 03285 fprintf(DBUG_FILE, "NULL"); 03286 else 03287 { 03288 char buf[256]; 03289 String str(buf, sizeof(buf), &my_charset_bin); 03290 str.length(0); 03291 String *pstr; 03292 pstr= field->val_str(&str); 03293 fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); 03294 } 03295 } 03296 03297 03298 /* Print a "c1 < keypartX < c2" - type interval into debug trace. */ 03299 static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) 03300 { 03301 DBUG_ENTER("dbug_print_segment_range"); 03302 DBUG_LOCK_FILE; 03303 if (!(arg->min_flag & NO_MIN_RANGE)) 03304 { 03305 store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); 03306 dbug_print_field(part->field); 03307 if (arg->min_flag & NEAR_MIN) 03308 fputs(" < ", DBUG_FILE); 03309 else 03310 fputs(" <= ", DBUG_FILE); 03311 } 03312 03313 fprintf(DBUG_FILE, "%s", part->field->field_name); 03314 03315 if (!(arg->max_flag & NO_MAX_RANGE)) 03316 { 03317 if (arg->max_flag & NEAR_MAX) 03318 fputs(" < ", DBUG_FILE); 03319 else 03320 fputs(" <= ", DBUG_FILE); 03321 store_key_image_to_rec(part->field, (char*)(arg->max_value), part->length); 03322 dbug_print_field(part->field); 03323 } 03324 fputs("\n", DBUG_FILE); 03325 DBUG_UNLOCK_FILE; 03326 DBUG_VOID_RETURN; 03327 } 03328 03329 03330 /* 03331 Print a singlepoint multi-keypart range interval to debug trace 03332 03333 SYNOPSIS 03334 dbug_print_singlepoint_range() 03335 start Array of SEL_ARG* ptrs representing conditions on key parts 03336 num Number of elements in the array. 03337 03338 DESCRIPTION 03339 This function prints a "keypartN=constN AND ... AND keypartK=constK"-type 03340 interval to debug trace. 03341 */ 03342 03343 static void dbug_print_singlepoint_range(SEL_ARG **start, uint num) 03344 { 03345 DBUG_ENTER("dbug_print_singlepoint_range"); 03346 DBUG_LOCK_FILE; 03347 SEL_ARG **end= start + num; 03348 03349 for (SEL_ARG **arg= start; arg != end; arg++) 03350 { 03351 Field *field= (*arg)->field; 03352 fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); 03353 dbug_print_field(field); 03354 } 03355 fputs("\n", DBUG_FILE); 03356 DBUG_UNLOCK_FILE; 03357 DBUG_VOID_RETURN; 03358 } 03359 #endif 03360 03361 /**************************************************************************** 03362 * Partition pruning code ends 03363 ****************************************************************************/ 03364 #endif 03365 03366 03367 /* 03368 Get cost of 'sweep' full records retrieval. 03369 SYNOPSIS 03370 get_sweep_read_cost() 03371 param Parameter from test_quick_select 03372 records # of records to be retrieved 03373 RETURN 03374 cost of sweep 03375 */ 03376 03377 double get_sweep_read_cost(const PARAM *param, ha_rows records) 03378 { 03379 double result; 03380 DBUG_ENTER("get_sweep_read_cost"); 03381 if (param->table->file->primary_key_is_clustered()) 03382 { 03383 result= param->table->file->read_time(param->table->s->primary_key, 03384 records, records); 03385 } 03386 else 03387 { 03388 double n_blocks= 03389 ceil(ulonglong2double(param->table->file->stats.data_file_length) / 03390 IO_SIZE); 03391 double busy_blocks= 03392 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); 03393 if (busy_blocks < 1.0) 03394 busy_blocks= 1.0; 03395 DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, 03396 busy_blocks)); 03397 /* 03398 Disabled: Bail out if # of blocks to read is bigger than # of blocks in 03399 table data file. 03400 if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) 03401 return 1; 03402 */ 03403 JOIN *join= param->thd->lex->select_lex.join; 03404 if (!join || join->tables == 1) 03405 { 03406 /* No join, assume reading is done in one 'sweep' */ 03407 result= busy_blocks*(DISK_SEEK_BASE_COST + 03408 DISK_SEEK_PROP_COST*n_blocks/busy_blocks); 03409 } 03410 else 03411 { 03412 /* 03413 Possibly this is a join with source table being non-last table, so 03414 assume that disk seeks are random here. 03415 */ 03416 result= busy_blocks; 03417 } 03418 } 03419 DBUG_PRINT("info",("returning cost=%g", result)); 03420 DBUG_RETURN(result); 03421 } 03422 03423 03424 /* 03425 Get best plan for a SEL_IMERGE disjunctive expression. 03426 SYNOPSIS 03427 get_best_disjunct_quick() 03428 param Parameter from check_quick_select function 03429 imerge Expression to use 03430 read_time Don't create scans with cost > read_time 03431 03432 NOTES 03433 index_merge cost is calculated as follows: 03434 index_merge_cost = 03435 cost(index_reads) + (see #1) 03436 cost(rowid_to_row_scan) + (see #2) 03437 cost(unique_use) (see #3) 03438 03439 1. cost(index_reads) =SUM_i(cost(index_read_i)) 03440 For non-CPK scans, 03441 cost(index_read_i) = {cost of ordinary 'index only' scan} 03442 For CPK scan, 03443 cost(index_read_i) = {cost of non-'index only' scan} 03444 03445 2. cost(rowid_to_row_scan) 03446 If table PK is clustered then 03447 cost(rowid_to_row_scan) = 03448 {cost of ordinary clustered PK scan with n_ranges=n_rows} 03449 03450 Otherwise, we use the following model to calculate costs: 03451 We need to retrieve n_rows rows from file that occupies n_blocks blocks. 03452 We assume that offsets of rows we need are independent variates with 03453 uniform distribution in [0..max_file_offset] range. 03454 03455 We'll denote block as "busy" if it contains row(s) we need to retrieve 03456 and "empty" if doesn't contain rows we need. 03457 03458 Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this 03459 applies to any block in file). Let x_i be a variate taking value 1 if 03460 block #i is empty and 0 otherwise. 03461 03462 Then E(x_i) = (1 - 1/n_blocks)^n_rows; 03463 03464 E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) = 03465 = n_blocks * ((1 - 1/n_blocks)^n_rows) = 03466 ~= n_blocks * exp(-n_rows/n_blocks). 03467 03468 E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) = 03469 ~= n_blocks * (1 - exp(-n_rows/n_blocks)). 03470 03471 Average size of "hole" between neighbor non-empty blocks is 03472 E(hole_size) = n_blocks/E(n_busy_blocks). 03473 03474 The total cost of reading all needed blocks in one "sweep" is: 03475 03476 E(n_busy_blocks)* 03477 (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). 03478 03479 3. Cost of Unique use is calculated in Unique::get_use_cost function. 03480 03481 ROR-union cost is calculated in the same way index_merge, but instead of 03482 Unique a priority queue is used. 03483 03484 RETURN 03485 Created read plan 03486 NULL - Out of memory or no read scan could be built. 03487 */ 03488 03489 static 03490 TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, 03491 double read_time) 03492 { 03493 SEL_TREE **ptree; 03494 TRP_INDEX_MERGE *imerge_trp= NULL; 03495 uint n_child_scans= imerge->trees_next - imerge->trees; 03496 TRP_RANGE **range_scans; 03497 TRP_RANGE **cur_child; 03498 TRP_RANGE **cpk_scan= NULL; 03499 bool imerge_too_expensive= FALSE; 03500 double imerge_cost= 0.0; 03501 ha_rows cpk_scan_records= 0; 03502 ha_rows non_cpk_scan_records= 0; 03503 bool pk_is_clustered= param->table->file->primary_key_is_clustered(); 03504 bool all_scans_ror_able= TRUE; 03505 bool all_scans_rors= TRUE; 03506 uint unique_calc_buff_size; 03507 TABLE_READ_PLAN **roru_read_plans; 03508 TABLE_READ_PLAN **cur_roru_plan; 03509 double roru_index_costs; 03510 ha_rows roru_total_records; 03511 double roru_intersect_part= 1.0; 03512 DBUG_ENTER("get_best_disjunct_quick"); 03513 DBUG_PRINT("info", ("Full table scan cost =%g", read_time)); 03514 03515 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, 03516 sizeof(TRP_RANGE*)* 03517 n_child_scans))) 03518 DBUG_RETURN(NULL); 03519 /* 03520 Collect best 'range' scan for each of disjuncts, and, while doing so, 03521 analyze possibility of ROR scans. Also calculate some values needed by 03522 other parts of the code. 03523 */ 03524 for (ptree= imerge->trees, cur_child= range_scans; 03525 ptree != imerge->trees_next; 03526 ptree++, cur_child++) 03527 { 03528 DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map, 03529 "tree in SEL_IMERGE");); 03530 if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time))) 03531 { 03532 /* 03533 One of index scans in this index_merge is more expensive than entire 03534 table read for another available option. The entire index_merge (and 03535 any possible ROR-union) will be more expensive then, too. We continue 03536 here only to update SQL_SELECT members. 03537 */ 03538 imerge_too_expensive= TRUE; 03539 } 03540 if (imerge_too_expensive) 03541 continue; 03542 03543 imerge_cost += (*cur_child)->read_cost; 03544 all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); 03545 all_scans_rors &= (*cur_child)->is_ror; 03546 if (pk_is_clustered && 03547 param->real_keynr[(*cur_child)->key_idx] == 03548 param->table->s->primary_key) 03549 { 03550 cpk_scan= cur_child; 03551 cpk_scan_records= (*cur_child)->records; 03552 } 03553 else 03554 non_cpk_scan_records += (*cur_child)->records; 03555 } 03556 03557 DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost)); 03558 if (imerge_too_expensive || (imerge_cost > read_time) || 03559 (non_cpk_scan_records+cpk_scan_records >= param->table->file->stats.records) && 03560 read_time != DBL_MAX) 03561 { 03562 /* 03563 Bail out if it is obvious that both index_merge and ROR-union will be 03564 more expensive 03565 */ 03566 DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than " 03567 "full table scan, bailing out")); 03568 DBUG_RETURN(NULL); 03569 } 03570 if (all_scans_rors) 03571 { 03572 roru_read_plans= (TABLE_READ_PLAN**)range_scans; 03573 goto skip_to_ror_scan; 03574 } 03575 if (cpk_scan) 03576 { 03577 /* 03578 Add one ROWID comparison for each row retrieved on non-CPK scan. (it 03579 is done in QUICK_RANGE_SELECT::row_in_ranges) 03580 */ 03581 imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; 03582 } 03583 03584 /* Calculate cost(rowid_to_row_scan) */ 03585 imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records); 03586 DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", 03587 imerge_cost)); 03588 if (imerge_cost > read_time) 03589 goto build_ror_index_merge; 03590 03591 /* Add Unique operations cost */ 03592 unique_calc_buff_size= 03593 Unique::get_cost_calc_buff_size(non_cpk_scan_records, 03594 param->table->file->ref_length, 03595 param->thd->variables.sortbuff_size); 03596 if (param->imerge_cost_buff_size < unique_calc_buff_size) 03597 { 03598 if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root, 03599 unique_calc_buff_size))) 03600 DBUG_RETURN(NULL); 03601 param->imerge_cost_buff_size= unique_calc_buff_size; 03602 } 03603 03604 imerge_cost += 03605 Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records, 03606 param->table->file->ref_length, 03607 param->thd->variables.sortbuff_size); 03608 DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", 03609 imerge_cost, read_time)); 03610 if (imerge_cost < read_time) 03611 { 03612 if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE)) 03613 { 03614 imerge_trp->read_cost= imerge_cost; 03615 imerge_trp->records= non_cpk_scan_records + cpk_scan_records; 03616 imerge_trp->records= min(imerge_trp->records, 03617 param->table->file->stats.records); 03618 imerge_trp->range_scans= range_scans; 03619 imerge_trp->range_scans_end= range_scans + n_child_scans; 03620 read_time= imerge_cost; 03621 } 03622 } 03623 03624 build_ror_index_merge: 03625 if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE) 03626 DBUG_RETURN(imerge_trp); 03627 03628 /* Ok, it is possible to build a ROR-union, try it. */ 03629 bool dummy; 03630 if (!(roru_read_plans= 03631 (TABLE_READ_PLAN**)alloc_root(param->mem_root, 03632 sizeof(TABLE_READ_PLAN*)* 03633 n_child_scans))) 03634 DBUG_RETURN(imerge_trp); 03635 skip_to_ror_scan: 03636 roru_index_costs= 0.0; 03637 roru_total_records= 0; 03638 cur_roru_plan= roru_read_plans; 03639 03640 /* Find 'best' ROR scan for each of trees in disjunction */ 03641 for (ptree= imerge->trees, cur_child= range_scans; 03642 ptree != imerge->trees_next; 03643 ptree++, cur_child++, cur_roru_plan++) 03644 { 03645 /* 03646 Assume the best ROR scan is the one that has cheapest full-row-retrieval 03647 scan cost. 03648 Also accumulate index_only scan costs as we'll need them to calculate 03649 overall index_intersection cost. 03650 */ 03651 double cost; 03652 if ((*cur_child)->is_ror) 03653 { 03654 /* Ok, we have index_only cost, now get full rows scan cost */ 03655 cost= param->table->file-> 03656 read_time(param->real_keynr[(*cur_child)->key_idx], 1, 03657 (*cur_child)->records) + 03658 rows2double((*cur_child)->records) / TIME_FOR_COMPARE; 03659 } 03660 else 03661 cost= read_time; 03662 03663 TABLE_READ_PLAN *prev_plan= *cur_child; 03664 if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost, 03665 &dummy))) 03666 { 03667 if (prev_plan->is_ror) 03668 *cur_roru_plan= prev_plan; 03669 else 03670 DBUG_RETURN(imerge_trp); 03671 roru_index_costs += (*cur_roru_plan)->read_cost; 03672 } 03673 else 03674 roru_index_costs += 03675 ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; 03676 roru_total_records += (*cur_roru_plan)->records; 03677 roru_intersect_part *= (*cur_roru_plan)->records / 03678 param->table->file->stats.records; 03679 } 03680 03681 /* 03682 rows to retrieve= 03683 SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). 03684 This is valid because index_merge construction guarantees that conditions 03685 in disjunction do not share key parts. 03686 */ 03687 roru_total_records -= (ha_rows)(roru_intersect_part* 03688 param->table->file->stats.records); 03689 /* ok, got a ROR read plan for each of the disjuncts 03690 Calculate cost: 03691 cost(index_union_scan(scan_1, ... scan_n)) = 03692 SUM_i(cost_of_index_only_scan(scan_i)) + 03693 queue_use_cost(rowid_len, n) + 03694 cost_of_row_retrieval 03695 See get_merge_buffers_cost function for queue_use_cost formula derivation. 03696 */ 03697 03698 double roru_total_cost; 03699 roru_total_cost= roru_index_costs + 03700 rows2double(roru_total_records)*log((double)n_child_scans) / 03701 (TIME_FOR_COMPARE_ROWID * M_LN2) + 03702 get_sweep_read_cost(param, roru_total_records); 03703 03704 DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, 03705 n_child_scans)); 03706 TRP_ROR_UNION* roru; 03707 if (roru_total_cost < read_time) 03708 { 03709 if ((roru= new (param->mem_root) TRP_ROR_UNION)) 03710 { 03711 roru->first_ror= roru_read_plans; 03712 roru->last_ror= roru_read_plans + n_child_scans; 03713 roru->read_cost= roru_total_cost; 03714 roru->records= roru_total_records; 03715 DBUG_RETURN(roru); 03716 } 03717 } 03718 DBUG_RETURN(imerge_trp); 03719 } 03720 03721 03722 /* 03723 Calculate cost of 'index only' scan for given index and number of records. 03724 03725 SYNOPSIS 03726 get_index_only_read_time() 03727 param parameters structure 03728 records #of records to read 03729 keynr key to read 03730 03731 NOTES 03732 It is assumed that we will read trough the whole key range and that all 03733 key blocks are half full (normally things are much better). It is also 03734 assumed that each time we read the next key from the index, the handler 03735 performs a random seek, thus the cost is proportional to the number of 03736 blocks read. 03737 03738 TODO: 03739 Move this to handler->read_time() by adding a flag 'index-only-read' to 03740 this call. The reason for doing this is that the current function doesn't 03741 handle the case when the row is stored in the b-tree (like in innodb 03742 clustered index) 03743 */ 03744 03745 static double get_index_only_read_time(const PARAM* param, ha_rows records, 03746 int keynr) 03747 { 03748 double read_time; 03749 uint keys_per_block= (param->table->file->stats.block_size/2/ 03750 (param->table->key_info[keynr].key_length+ 03751 param->table->file->ref_length) + 1); 03752 read_time=((double) (records+keys_per_block-1)/ 03753 (double) keys_per_block); 03754 return read_time; 03755 } 03756 03757 03758 typedef struct st_ror_scan_info 03759 { 03760 uint idx; /* # of used key in param->keys */ 03761 uint keynr; /* # of used key in table */ 03762 ha_rows records; /* estimate of # records this scan will return */ 03763 03764 /* Set of intervals over key fields that will be used for row retrieval. */ 03765 SEL_ARG *sel_arg; 03766 03767 /* Fields used in the query and covered by this ROR scan. */ 03768 MY_BITMAP covered_fields; 03769 uint used_fields_covered; /* # of set bits in covered_fields */ 03770 int key_rec_length; /* length of key record (including rowid) */ 03771 03772 /* 03773 Cost of reading all index records with values in sel_arg intervals set 03774 (assuming there is no need to access full table records) 03775 */ 03776 double index_read_cost; 03777 uint first_uncovered_field; /* first unused bit in covered_fields */ 03778 uint key_components; /* # of parts in the key */ 03779 } ROR_SCAN_INFO; 03780 03781 03782 /* 03783 Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using 03784 sel_arg set of intervals. 03785 03786 SYNOPSIS 03787 make_ror_scan() 03788 param Parameter from test_quick_select function 03789 idx Index of key in param->keys 03790 sel_arg Set of intervals for a given key 03791 03792 RETURN 03793 NULL - out of memory 03794 ROR scan structure containing a scan for {idx, sel_arg} 03795 */ 03796 03797 static 03798 ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) 03799 { 03800 ROR_SCAN_INFO *ror_scan; 03801 my_bitmap_map *bitmap_buf; 03802 uint keynr; 03803 DBUG_ENTER("make_ror_scan"); 03804 03805 if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, 03806 sizeof(ROR_SCAN_INFO)))) 03807 DBUG_RETURN(NULL); 03808 03809 ror_scan->idx= idx; 03810 ror_scan->keynr= keynr= param->real_keynr[idx]; 03811 ror_scan->key_rec_length= (param->table->key_info[keynr].key_length + 03812 param->table->file->ref_length); 03813 ror_scan->sel_arg= sel_arg; 03814 ror_scan->records= param->table->quick_rows[keynr]; 03815 03816 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(param->mem_root, 03817 param->fields_bitmap_size))) 03818 DBUG_RETURN(NULL); 03819 03820 if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, 03821 param->table->s->fields, FALSE)) 03822 DBUG_RETURN(NULL); 03823 bitmap_clear_all(&ror_scan->covered_fields); 03824 03825 KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; 03826 KEY_PART_INFO *key_part_end= key_part + 03827 param->table->key_info[keynr].key_parts; 03828 for (;key_part != key_part_end; ++key_part) 03829 { 03830 if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr-1)) 03831 bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1); 03832 } 03833 ror_scan->index_read_cost= 03834 get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr], 03835 ror_scan->keynr); 03836 DBUG_RETURN(ror_scan); 03837 } 03838 03839 03840 /* 03841 Compare two ROR_SCAN_INFO** by E(#records_matched) * key_record_length. 03842 SYNOPSIS 03843 cmp_ror_scan_info() 03844 a ptr to first compared value 03845 b ptr to second compared value 03846 03847 RETURN 03848 -1 a < b 03849 0 a = b 03850 1 a > b 03851 */ 03852 03853 static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) 03854 { 03855 double val1= rows2double((*a)->records) * (*a)->key_rec_length; 03856 double val2= rows2double((*b)->records) * (*b)->key_rec_length; 03857 return (val1 < val2)? -1: (val1 == val2)? 0 : 1; 03858 } 03859 03860 /* 03861 Compare two ROR_SCAN_INFO** by 03862 (#covered fields in F desc, 03863 #components asc, 03864 number of first not covered component asc) 03865 03866 SYNOPSIS 03867 cmp_ror_scan_info_covering() 03868 a ptr to first compared value 03869 b ptr to second compared value 03870 03871 RETURN 03872 -1 a < b 03873 0 a = b 03874 1 a > b 03875 */ 03876 03877 static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) 03878 { 03879 if ((*a)->used_fields_covered > (*b)->used_fields_covered) 03880 return -1; 03881 if ((*a)->used_fields_covered < (*b)->used_fields_covered) 03882 return 1; 03883 if ((*a)->key_components < (*b)->key_components) 03884 return -1; 03885 if ((*a)->key_components > (*b)->key_components) 03886 return 1; 03887 if ((*a)->first_uncovered_field < (*b)->first_uncovered_field) 03888 return -1; 03889 if ((*a)->first_uncovered_field > (*b)->first_uncovered_field) 03890 return 1; 03891 return 0; 03892 } 03893 03894 03895 /* Auxiliary structure for incremental ROR-intersection creation */ 03896 typedef struct 03897 { 03898 const PARAM *param; 03899 MY_BITMAP covered_fields; /* union of fields covered by all scans */ 03900 /* 03901 Fraction of table records that satisfies conditions of all scans. 03902 This is the number of full records that will be retrieved if a 03903 non-index_only index intersection will be employed. 03904 */ 03905 double out_rows; 03906 /* TRUE if covered_fields is a superset of needed_fields */ 03907 bool is_covering; 03908 03909 ha_rows index_records; /* sum(#records to look in indexes) */ 03910 double index_scan_costs; /* SUM(cost of 'index-only' scans) */ 03911 double total_cost; 03912 } ROR_INTERSECT_INFO; 03913 03914 03915 /* 03916 Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans. 03917 03918 SYNOPSIS 03919 ror_intersect_init() 03920 param Parameter from test_quick_select 03921 03922 RETURN 03923 allocated structure 03924 NULL on error 03925 */ 03926 03927 static 03928 ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) 03929 { 03930 ROR_INTERSECT_INFO *info; 03931 my_bitmap_map* buf; 03932 if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, 03933 sizeof(ROR_INTERSECT_INFO)))) 03934 return NULL; 03935 info->param= param; 03936 if (!(buf= (my_bitmap_map*) alloc_root(param->mem_root, 03937 param->fields_bitmap_size))) 03938 return NULL; 03939 if (bitmap_init(&info->covered_fields, buf, param->table->s->fields, 03940 FALSE)) 03941 return NULL; 03942 info->is_covering= FALSE; 03943 info->index_scan_costs= 0.0; 03944 info->index_records= 0; 03945 info->out_rows= param->table->file->stats.records; 03946 bitmap_clear_all(&info->covered_fields); 03947 return info; 03948 } 03949 03950 void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src) 03951 { 03952 dst->param= src->param; 03953 memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, 03954 no_bytes_in_map(&src->covered_fields)); 03955 dst->out_rows= src->out_rows; 03956 dst->is_covering= src->is_covering; 03957 dst->index_records= src->index_records; 03958 dst->index_scan_costs= src->index_scan_costs; 03959 dst->total_cost= src->total_cost; 03960 } 03961 03962 03963 /* 03964 Get selectivity of a ROR scan wrt ROR-intersection. 03965 03966 SYNOPSIS 03967 ror_scan_selectivity() 03968 info ROR-interection 03969 scan ROR scan 03970 03971 NOTES 03972 Suppose we have a condition on several keys 03973 cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key 03974 k_21=c_21 AND k_22=c_22 AND ... // parts of second key 03975 ... 03976 k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //parts of the key used by *scan 03977 03978 where k_ij may be the same as any k_pq (i.e. keys may have common parts). 03979 03980 A full row is retrieved if entire condition holds. 03981 03982 The recursive procedure for finding P(cond) is as follows: 03983 03984 First step: 03985 Pick 1st part of 1st key and break conjunction (1) into two parts: 03986 cond= (k_11=c_11 AND R) 03987 03988 Here R may still contain condition(s) equivalent to k_11=c_11. 03989 Nevertheless, the following holds: 03990 03991 P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11). 03992 03993 Mark k_11 as fixed field (and satisfied condition) F, save P(F), 03994 save R to be cond and proceed to recursion step. 03995 03996 Recursion step: 03997 We have a set of fixed fields/satisfied conditions) F, probability P(F), 03998 and remaining conjunction R 03999 Pick next key part on current key and its condition "k_ij=c_ij". 04000 We will add "k_ij=c_ij" into F and update P(F). 04001 Lets denote k_ij as t, R = t AND R1, where R1 may still contain t. Then 04002 04003 P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2) 04004 04005 (where '|' mean conditional probability, not "or") 04006 04007 Consider the first multiplier in (2). One of the following holds: 04008 a) F contains condition on field used in t (i.e. t AND F = F). 04009 Then P(t|F) = 1 04010 04011 b) F doesn't contain condition on field used in t. Then F and t are 04012 considered independent. 04013 04014 P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = 04015 = P(t|fields_before_t_in_key). 04016 04017 P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) / 04018 #records(fields_before_t_in_key, t) 04019 04020 The second multiplier is calculated by applying this step recursively. 04021 04022 IMPLEMENTATION 04023 This function calculates the result of application of the "recursion step" 04024 described above for all fixed key members of a single key, accumulating set 04025 of covered fields, selectivity, etc. 04026 04027 The calculation is conducted as follows: 04028 Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate 04029 04030 n_{k1} n_{k_2} 04031 --------- * --------- * .... (3) 04032 n_{k1-1} n_{k2_1} 04033 04034 where k1,k2,... are key parts which fields were not yet marked as fixed 04035 ( this is result of application of option b) of the recursion step for 04036 parts of a single key). 04037 Since it is reasonable to expect that most of the fields are not marked 04038 as fixed, we calculate (3) as 04039 04040 n_{i1} n_{i_2} 04041 (3) = n_{max_key_part} / ( --------- * --------- * .... ) 04042 n_{i1-1} n_{i2_1} 04043 04044 where i1,i2, .. are key parts that were already marked as fixed. 04045 04046 In order to minimize number of expensive records_in_range calls we group 04047 and reduce adjacent fractions. 04048 04049 RETURN 04050 Selectivity of given ROR scan. 04051 04052 */ 04053 04054 static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, 04055 const ROR_SCAN_INFO *scan) 04056 { 04057 double selectivity_mult= 1.0; 04058 KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part; 04059 byte key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */ 04060 char *key_ptr= (char*) key_val; 04061 SEL_ARG *sel_arg, *tuple_arg= NULL; 04062 bool cur_covered; 04063 bool prev_covered= test(bitmap_is_set(&info->covered_fields, 04064 key_part->fieldnr-1)); 04065 key_range min_range; 04066 key_range max_range; 04067 min_range.key= (byte*) key_val; 04068 min_range.flag= HA_READ_KEY_EXACT; 04069 max_range.key= (byte*) key_val; 04070 max_range.flag= HA_READ_AFTER_KEY; 04071 ha_rows prev_records= info->param->table->file->stats.records; 04072 DBUG_ENTER("ror_intersect_selectivity"); 04073 04074 for (sel_arg= scan->sel_arg; sel_arg; 04075 sel_arg= sel_arg->next_key_part) 04076 { 04077 DBUG_PRINT("info",("sel_arg step")); 04078 cur_covered= test(bitmap_is_set(&info->covered_fields, 04079 key_part[sel_arg->part].fieldnr-1)); 04080 if (cur_covered != prev_covered) 04081 { 04082 /* create (part1val, ..., part{n-1}val) tuple. */ 04083 ha_rows records; 04084 if (!tuple_arg) 04085 { 04086 tuple_arg= scan->sel_arg; 04087 /* Here we use the length of the first key part */ 04088 tuple_arg->store_min(key_part->store_length, &key_ptr, 0); 04089 } 04090 while (tuple_arg->next_key_part != sel_arg) 04091 { 04092 tuple_arg= tuple_arg->next_key_part; 04093 tuple_arg->store_min(key_part[tuple_arg->part].store_length, &key_ptr, 0); 04094 } 04095 min_range.length= max_range.length= ((char*) key_ptr - (char*) key_val); 04096 records= (info->param->table->file-> 04097 records_in_range(scan->keynr, &min_range, &max_range)); 04098 if (cur_covered) 04099 { 04100 /* uncovered -> covered */ 04101 double tmp= rows2double(records)/rows2double(prev_records); 04102 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); 04103 selectivity_mult *= tmp; 04104 prev_records= HA_POS_ERROR; 04105 } 04106 else 04107 { 04108 /* covered -> uncovered */ 04109 prev_records= records; 04110 } 04111 } 04112 prev_covered= cur_covered; 04113 } 04114 if (!prev_covered) 04115 { 04116 double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) / 04117 rows2double(prev_records); 04118 DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); 04119 selectivity_mult *= tmp; 04120 } 04121 DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult)); 04122 DBUG_RETURN(selectivity_mult); 04123 } 04124 04125 04126 /* 04127 Check if adding a ROR scan to a ROR-intersection reduces its cost of 04128 ROR-intersection and if yes, update parameters of ROR-intersection, 04129 including its cost. 04130 04131 SYNOPSIS 04132 ror_intersect_add() 04133 param Parameter from test_quick_select 04134 info ROR-intersection structure to add the scan to. 04135 ror_scan ROR scan info to add. 04136 is_cpk_scan If TRUE, add the scan as CPK scan (this can be inferred 04137 from other parameters and is passed separately only to 04138 avoid duplicating the inference code) 04139 04140 NOTES 04141 Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR- 04142 intersection decreases. The cost of ROR-intersection is calculated as 04143 follows: 04144 04145 cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval 04146 04147 When we add a scan the first increases and the second decreases. 04148 04149 cost_of_full_rows_retrieval= 04150 (union of indexes used covers all needed fields) ? 04151 cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) : 04152 0 04153 04154 E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) * 04155 ror_scan_selectivity({scan1}, scan2) * ... * 04156 ror_scan_selectivity({scan1,...}, scanN). 04157 RETURN 04158 TRUE ROR scan added to ROR-intersection, cost updated. 04159 FALSE It doesn't make sense to add this ROR scan to this ROR-intersection. 04160 */ 04161 04162 static bool ror_intersect_add(ROR_INTERSECT_INFO *info, 04163 ROR_SCAN_INFO* ror_scan, bool is_cpk_scan) 04164 { 04165 double selectivity_mult= 1.0; 04166 04167 DBUG_ENTER("ror_intersect_add"); 04168 DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows)); 04169 DBUG_PRINT("info", ("Adding scan on %s", 04170 info->param->table->key_info[ror_scan->keynr].name)); 04171 DBUG_PRINT("info", ("is_cpk_scan=%d",is_cpk_scan)); 04172 04173 selectivity_mult = ror_scan_selectivity(info, ror_scan); 04174 if (selectivity_mult == 1.0) 04175 { 04176 /* Don't add this scan if it doesn't improve selectivity. */ 04177 DBUG_PRINT("info", ("The scan doesn't improve selectivity.")); 04178 DBUG_RETURN(FALSE); 04179 } 04180 04181 info->out_rows *= selectivity_mult; 04182 DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost)); 04183 04184 if (is_cpk_scan) 04185 { 04186 /* 04187 CPK scan is used to filter out rows. We apply filtering for 04188 each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID 04189 per check this gives us: 04190 */ 04191 info->index_scan_costs += rows2double(info->index_records) / 04192 TIME_FOR_COMPARE_ROWID; 04193 } 04194 else 04195 { 04196 info->index_records += info->param->table->quick_rows[ror_scan->keynr]; 04197 info->index_scan_costs += ror_scan->index_read_cost; 04198 bitmap_union(&info->covered_fields, &ror_scan->covered_fields); 04199 if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, 04200 &info->covered_fields)) 04201 { 04202 DBUG_PRINT("info", ("ROR-intersect is covering now")); 04203 info->is_covering= TRUE; 04204 } 04205 } 04206 04207 info->total_cost= info->index_scan_costs; 04208 DBUG_PRINT("info", ("info->total_cost: %g", info->total_cost)); 04209 if (!info->is_covering) 04210 { 04211 info->total_cost += 04212 get_sweep_read_cost(info->param, double2rows(info->out_rows)); 04213 DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost)); 04214 } 04215 DBUG_PRINT("info", ("New out_rows: %g", info->out_rows)); 04216 DBUG_PRINT("info", ("New cost: %g, %scovering", info->total_cost, 04217 info->is_covering?"" : "non-")); 04218 DBUG_RETURN(TRUE); 04219 } 04220 04221 04222 /* 04223 Get best ROR-intersection plan using non-covering ROR-intersection search 04224 algorithm. The returned plan may be covering. 04225 04226 SYNOPSIS 04227 get_best_ror_intersect() 04228 param Parameter from test_quick_select function. 04229 tree Transformed restriction condition to be used to look 04230 for ROR scans. 04231 read_time Do not return read plans with cost > read_time. 04232 are_all_covering [out] set to TRUE if union of all scans covers all 04233 fields needed by the query (and it is possible to build 04234 a covering ROR-intersection) 04235 04236 NOTES 04237 get_key_scans_params must be called before this function can be called. 04238 04239 When this function is called by ROR-union construction algorithm it 04240 assumes it is building an uncovered ROR-intersection (and thus # of full 04241 records to be retrieved is wrong here). This is a hack. 04242 04243 IMPLEMENTATION 04244 The approximate best non-covering plan search algorithm is as follows: 04245 04246 find_min_ror_intersection_scan() 04247 { 04248 R= select all ROR scans; 04249 order R by (E(#records_matched) * key_record_length). 04250 04251 S= first(R); -- set of scans that will be used for ROR-intersection 04252 R= R-first(S); 04253 min_cost= cost(S); 04254 min_scan= make_scan(S); 04255 while (R is not empty) 04256 { 04257 firstR= R - first(R); 04258 if (!selectivity(S + firstR < selectivity(S))) 04259 continue; 04260 04261 S= S + first(R); 04262 if (cost(S) < min_cost) 04263 { 04264 min_cost= cost(S); 04265 min_scan= make_scan(S); 04266 } 04267 } 04268 return min_scan; 04269 } 04270 04271 See ror_intersect_add function for ROR intersection costs. 04272 04273 Special handling for Clustered PK scans 04274 Clustered PK contains all table fields, so using it as a regular scan in 04275 index intersection doesn't make sense: a range scan on CPK will be less 04276 expensive in this case. 04277 Clustered PK scan has special handling in ROR-intersection: it is not used 04278 to retrieve rows, instead its condition is used to filter row references 04279 we get from scans on other keys. 04280 04281 RETURN 04282 ROR-intersection table read plan 04283 NULL if out of memory or no suitable plan found. 04284 */ 04285 04286 static 04287 TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, 04288 double read_time, 04289 bool *are_all_covering) 04290 { 04291 uint idx; 04292 double min_cost= DBL_MAX; 04293 DBUG_ENTER("get_best_ror_intersect"); 04294 04295 if ((tree->n_ror_scans < 2) || !param->table->file->stats.records) 04296 DBUG_RETURN(NULL); 04297 04298 /* 04299 Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of 04300 them. Also find and save clustered PK scan if there is one. 04301 */ 04302 ROR_SCAN_INFO **cur_ror_scan; 04303 ROR_SCAN_INFO *cpk_scan= NULL; 04304 uint cpk_no; 04305 bool cpk_scan_used= FALSE; 04306 04307 if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, 04308 sizeof(ROR_SCAN_INFO*)* 04309 param->keys))) 04310 return NULL; 04311 cpk_no= ((param->table->file->primary_key_is_clustered()) ? 04312 param->table->s->primary_key : MAX_KEY); 04313 04314 for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) 04315 { 04316 ROR_SCAN_INFO *scan; 04317 if (!tree->ror_scans_map.is_set(idx)) 04318 continue; 04319 if (!(scan= make_ror_scan(param, idx, tree->keys[idx]))) 04320 return NULL; 04321 if (param->real_keynr[idx] == cpk_no) 04322 { 04323 cpk_scan= scan; 04324 tree->n_ror_scans--; 04325 } 04326 else 04327 *(cur_ror_scan++)= scan; 04328 } 04329 04330 tree->ror_scans_end= cur_ror_scan; 04331 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", 04332 tree->ror_scans, 04333 tree->ror_scans_end);); 04334 /* 04335 Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized 04336 ROR_SCAN_INFO's. 04337 Step 2: Get best ROR-intersection using an approximate algorithm. 04338 */ 04339 qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), 04340 (qsort_cmp)cmp_ror_scan_info); 04341 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", 04342 tree->ror_scans, 04343 tree->ror_scans_end);); 04344 04345 ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */ 04346 ROR_SCAN_INFO **intersect_scans_end; 04347 if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, 04348 sizeof(ROR_SCAN_INFO*)* 04349 tree->n_ror_scans))) 04350 return NULL; 04351 intersect_scans_end= intersect_scans; 04352 04353 /* Create and incrementally update ROR intersection. */ 04354 ROR_INTERSECT_INFO *intersect, *intersect_best; 04355 if (!(intersect= ror_intersect_init(param)) || 04356 !(intersect_best= ror_intersect_init(param))) 04357 return NULL; 04358 04359 /* [intersect_scans,intersect_scans_best) will hold the best intersection */ 04360 ROR_SCAN_INFO **intersect_scans_best; 04361 cur_ror_scan= tree->ror_scans; 04362 intersect_scans_best= intersect_scans; 04363 while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) 04364 { 04365 /* S= S + first(R); R= R - first(R); */ 04366 if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE)) 04367 { 04368 cur_ror_scan++; 04369 continue; 04370 } 04371 04372 *(intersect_scans_end++)= *(cur_ror_scan++); 04373 04374 if (intersect->total_cost < min_cost) 04375 { 04376 /* Local minimum found, save it */ 04377 ror_intersect_cpy(intersect_best, intersect); 04378 intersect_scans_best= intersect_scans_end; 04379 min_cost = intersect->total_cost; 04380 } 04381 } 04382 04383 if (intersect_scans_best == intersect_scans) 04384 { 04385 DBUG_PRINT("info", ("None of scans increase selectivity")); 04386 DBUG_RETURN(NULL); 04387 } 04388 04389 DBUG_EXECUTE("info",print_ror_scans_arr(param->table, 04390 "best ROR-intersection", 04391 intersect_scans, 04392 intersect_scans_best);); 04393 04394 *are_all_covering= intersect->is_covering; 04395 uint best_num= intersect_scans_best - intersect_scans; 04396 ror_intersect_cpy(intersect, intersect_best); 04397 04398 /* 04399 Ok, found the best ROR-intersection of non-CPK key scans. 04400 Check if we should add a CPK scan. If the obtained ROR-intersection is 04401 covering, it doesn't make sense to add CPK scan. 04402 */ 04403 if (cpk_scan && !intersect->is_covering) 04404 { 04405 if (ror_intersect_add(intersect, cpk_scan, TRUE) && 04406 (intersect->total_cost < min_cost)) 04407 { 04408 cpk_scan_used= TRUE; 04409 intersect_best= intersect; //just set pointer here 04410 } 04411 } 04412 04413 /* Ok, return ROR-intersect plan if we have found one */ 04414 TRP_ROR_INTERSECT *trp= NULL; 04415 if (min_cost < read_time && (cpk_scan_used || best_num > 1)) 04416 { 04417 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) 04418 DBUG_RETURN(trp); 04419 if (!(trp->first_scan= 04420 (ROR_SCAN_INFO**)alloc_root(param->mem_root, 04421 sizeof(ROR_SCAN_INFO*)*best_num))) 04422 DBUG_RETURN(NULL); 04423 memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*)); 04424 trp->last_scan= trp->first_scan + best_num; 04425 trp->is_covering= intersect_best->is_covering; 04426 trp->read_cost= intersect_best->total_cost; 04427 /* Prevent divisons by zero */ 04428 ha_rows best_rows = double2rows(intersect_best->out_rows); 04429 if (!best_rows) 04430 best_rows= 1; 04431 set_if_smaller(param->table->quick_condition_rows, best_rows); 04432 trp->records= best_rows; 04433 trp->index_scan_costs= intersect_best->index_scan_costs; 04434 trp->cpk_scan= cpk_scan_used? cpk_scan: NULL; 04435 DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:" 04436 "cost %g, records %lu", 04437 trp->read_cost, (ulong) trp->records)); 04438 } 04439 DBUG_RETURN(trp); 04440 } 04441 04442 04443 /* 04444 Get best covering ROR-intersection. 04445 SYNOPSIS 04446 get_best_covering_ror_intersect() 04447 param Parameter from test_quick_select function. 04448 tree SEL_TREE with sets of intervals for different keys. 04449 read_time Don't return table read plans with cost > read_time. 04450 04451 RETURN 04452 Best covering ROR-intersection plan 04453 NULL if no plan found. 04454 04455 NOTES 04456 get_best_ror_intersect must be called for a tree before calling this 04457 function for it. 04458 This function invalidates tree->ror_scans member values. 04459 04460 The following approximate algorithm is used: 04461 I=set of all covering indexes 04462 F=set of all fields to cover 04463 S={} 04464 04465 do 04466 { 04467 Order I by (#covered fields in F desc, 04468 #components asc, 04469 number of first not covered component asc); 04470 F=F-covered by first(I); 04471 S=S+first(I); 04472 I=I-first(I); 04473 } while F is not empty. 04474 */ 04475 04476 static 04477 TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, 04478 SEL_TREE *tree, 04479 double read_time) 04480 { 04481 ROR_SCAN_INFO **ror_scan_mark; 04482 ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end; 04483 DBUG_ENTER("get_best_covering_ror_intersect"); 04484 04485 for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan) 04486 (*scan)->key_components= 04487 param->table->key_info[(*scan)->keynr].key_parts; 04488 04489 /* 04490 Run covering-ROR-search algorithm. 04491 Assume set I is [ror_scan .. ror_scans_end) 04492 */ 04493 04494 /*I=set of all covering indexes */ 04495 ror_scan_mark= tree->ror_scans; 04496 04497 my_bitmap_map int_buf[MAX_KEY/(sizeof(my_bitmap_map)*8)+1]; 04498 MY_BITMAP covered_fields; 04499 if (bitmap_init(&covered_fields, int_buf, param->table->s->fields, FALSE)) 04500 DBUG_RETURN(0); 04501 bitmap_clear_all(&covered_fields); 04502 04503 double total_cost= 0.0f; 04504 ha_rows records=0; 04505 bool all_covered; 04506 04507 DBUG_PRINT("info", ("Building covering ROR-intersection")); 04508 DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 04509 "building covering ROR-I", 04510 ror_scan_mark, ror_scans_end);); 04511 do 04512 { 04513 /* 04514 Update changed sorting info: 04515 #covered fields, 04516 number of first not covered component 04517 Calculate and save these values for each of remaining scans. 04518 */ 04519 for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) 04520 { 04521 bitmap_subtract(&(*scan)->covered_fields, &covered_fields); 04522 (*scan)->used_fields_covered= 04523 bitmap_bits_set(&(*scan)->covered_fields); 04524 (*scan)->first_uncovered_field= 04525 bitmap_get_first(&(*scan)->covered_fields); 04526 } 04527 04528 qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), 04529 (qsort_cmp)cmp_ror_scan_info_covering); 04530 04531 DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 04532 "remaining scans", 04533 ror_scan_mark, ror_scans_end);); 04534 04535 /* I=I-first(I) */ 04536 total_cost += (*ror_scan_mark)->index_read_cost; 04537 records += (*ror_scan_mark)->records; 04538 DBUG_PRINT("info", ("Adding scan on %s", 04539 param->table->key_info[(*ror_scan_mark)->keynr].name)); 04540 if (total_cost > read_time) 04541 DBUG_RETURN(NULL); 04542 /* F=F-covered by first(I) */ 04543 bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields); 04544 all_covered= bitmap_is_subset(¶m->needed_fields, &covered_fields); 04545 } while ((++ror_scan_mark < ror_scans_end) && !all_covered); 04546 04547 if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1) 04548 DBUG_RETURN(NULL); 04549 04550 /* 04551 Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with 04552 cost total_cost. 04553 */ 04554 DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost)); 04555 DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 04556 "creating covering ROR-intersect", 04557 tree->ror_scans, ror_scan_mark);); 04558 04559 /* Add priority queue use cost. */ 04560 total_cost += rows2double(records)* 04561 log((double)(ror_scan_mark - tree->ror_scans)) / 04562 (TIME_FOR_COMPARE_ROWID * M_LN2); 04563 DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); 04564 04565 if (total_cost > read_time) 04566 DBUG_RETURN(NULL); 04567 04568 TRP_ROR_INTERSECT *trp; 04569 if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) 04570 DBUG_RETURN(trp); 04571 uint best_num= (ror_scan_mark - tree->ror_scans); 04572 if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, 04573 sizeof(ROR_SCAN_INFO*)* 04574 best_num))) 04575 DBUG_RETURN(NULL); 04576 memcpy(trp->first_scan, tree->ror_scans, best_num*sizeof(ROR_SCAN_INFO*)); 04577 trp->last_scan= trp->first_scan + best_num; 04578 trp->is_covering= TRUE; 04579 trp->read_cost= total_cost; 04580 trp->records= records; 04581 trp->cpk_scan= NULL; 04582 set_if_smaller(param->table->quick_condition_rows, records); 04583 04584 DBUG_PRINT("info", 04585 ("Returning covering ROR-intersect plan: cost %g, records %lu", 04586 trp->read_cost, (ulong) trp->records)); 04587 DBUG_RETURN(trp); 04588 } 04589 04590 04591 /* 04592 Get best "range" table read plan for given SEL_TREE. 04593 Also update PARAM members and store ROR scans info in the SEL_TREE. 04594 SYNOPSIS 04595 get_key_scans_params 04596 param parameters from test_quick_select 04597 tree make range select for this SEL_TREE 04598 index_read_must_be_used if TRUE, assume 'index only' option will be set 04599 (except for clustered PK indexes) 04600 read_time don't create read plans with cost > read_time. 04601 RETURN 04602 Best range read plan 04603 NULL if no plan found or error occurred 04604 */ 04605 04606 static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, 04607 bool index_read_must_be_used, 04608 bool update_tbl_stats, 04609 double read_time) 04610 { 04611 int idx; 04612 SEL_ARG **key,**end, **key_to_read= NULL; 04613 ha_rows best_records; 04614 TRP_RANGE* read_plan= NULL; 04615 bool pk_is_clustered= param->table->file->primary_key_is_clustered(); 04616 DBUG_ENTER("get_key_scans_params"); 04617 LINT_INIT(best_records); /* protected by key_to_read */ 04618 /* 04619 Note that there may be trees that have type SEL_TREE::KEY but contain no 04620 key reads at all, e.g. tree for expression "key1 is not null" where key1 04621 is defined as "not null". 04622 */ 04623 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, 04624 "tree scans");); 04625 tree->ror_scans_map.clear_all(); 04626 tree->n_ror_scans= 0; 04627 for (idx= 0,key=tree->keys, end=key+param->keys; 04628 key != end ; 04629 key++,idx++) 04630 { 04631 ha_rows found_records; 04632 double found_read_time; 04633 if (*key) 04634 { 04635 uint keynr= param->real_keynr[idx]; 04636 if ((*key)->type == SEL_ARG::MAYBE_KEY || 04637 (*key)->maybe_flag) 04638 param->needed_reg->set_bit(keynr); 04639 04640 bool read_index_only= index_read_must_be_used ? TRUE : 04641 (bool) param->table->used_keys.is_set(keynr); 04642 04643 found_records= check_quick_select(param, idx, *key, update_tbl_stats); 04644 if (param->is_ror_scan) 04645 { 04646 tree->n_ror_scans++; 04647 tree->ror_scans_map.set_bit(idx); 04648 } 04649 double cpu_cost= (double) found_records / TIME_FOR_COMPARE; 04650 if (found_records != HA_POS_ERROR && found_records > 2 && 04651 read_index_only && 04652 (param->table->file->index_flags(keynr, param->max_key_part,1) & 04653 HA_KEYREAD_ONLY) && 04654 !(pk_is_clustered && keynr == param->table->s->primary_key)) 04655 { 04656 /* 04657 We can resolve this by only reading through this key. 04658 0.01 is added to avoid races between range and 'index' scan. 04659 */ 04660 found_read_time= get_index_only_read_time(param,found_records,keynr) + 04661 cpu_cost + 0.01; 04662 } 04663 else 04664 { 04665 /* 04666 cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks) 04667 The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function. 04668 */ 04669 found_read_time= param->table->file->read_time(keynr, 04670 param->range_count, 04671 found_records) + 04672 cpu_cost + 0.01; 04673 } 04674 DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)", 04675 param->table->key_info[keynr].name, found_read_time, 04676 read_time)); 04677 04678 if (read_time > found_read_time && found_records != HA_POS_ERROR 04679 /*|| read_time == DBL_MAX*/ ) 04680 { 04681 read_time= found_read_time; 04682 best_records= found_records; 04683 key_to_read= key; 04684 } 04685 04686 } 04687 } 04688 04689 DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map, 04690 "ROR scans");); 04691 if (key_to_read) 04692 { 04693 idx= key_to_read - tree->keys; 04694 if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx))) 04695 { 04696 read_plan->records= best_records; 04697 read_plan->is_ror= tree->ror_scans_map.is_set(idx); 04698 read_plan->read_cost= read_time; 04699 DBUG_PRINT("info", 04700 ("Returning range plan for key %s, cost %g, records %lu", 04701 param->table->key_info[param->real_keynr[idx]].name, 04702 read_plan->read_cost, (ulong) read_plan->records)); 04703 } 04704 } 04705 else 04706 DBUG_PRINT("info", ("No 'range' table read plan found")); 04707 04708 DBUG_RETURN(read_plan); 04709 } 04710 04711 04712 QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, 04713 bool retrieve_full_rows, 04714 MEM_ROOT *parent_alloc) 04715 { 04716 QUICK_INDEX_MERGE_SELECT *quick_imerge; 04717 QUICK_RANGE_SELECT *quick; 04718 /* index_merge always retrieves full rows, ignore retrieve_full_rows */ 04719 if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table))) 04720 return NULL; 04721 04722 quick_imerge->records= records; 04723 quick_imerge->read_time= read_cost; 04724 for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; 04725 range_scan++) 04726 { 04727 if (!(quick= (QUICK_RANGE_SELECT*) 04728 ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))|| 04729 quick_imerge->push_quick_back(quick)) 04730 { 04731 delete quick; 04732 delete quick_imerge; 04733 return NULL; 04734 } 04735 } 04736 return quick_imerge; 04737 } 04738 04739 QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, 04740 bool retrieve_full_rows, 04741 MEM_ROOT *parent_alloc) 04742 { 04743 QUICK_ROR_INTERSECT_SELECT *quick_intrsect; 04744 QUICK_RANGE_SELECT *quick; 04745 DBUG_ENTER("TRP_ROR_INTERSECT::make_quick"); 04746 MEM_ROOT *alloc; 04747 04748 if ((quick_intrsect= 04749 new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table, 04750 (retrieve_full_rows? (!is_covering) : 04751 FALSE), 04752 parent_alloc))) 04753 { 04754 DBUG_EXECUTE("info", print_ror_scans_arr(param->table, 04755 "creating ROR-intersect", 04756 first_scan, last_scan);); 04757 alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc; 04758 for (; first_scan != last_scan;++first_scan) 04759 { 04760 if (!(quick= get_quick_select(param, (*first_scan)->idx, 04761 (*first_scan)->sel_arg, alloc)) || 04762 quick_intrsect->push_quick_back(quick)) 04763 { 04764 delete quick_intrsect; 04765 DBUG_RETURN(NULL); 04766 } 04767 } 04768 if (cpk_scan) 04769 { 04770 if (!(quick= get_quick_select(param, cpk_scan->idx, 04771 cpk_scan->sel_arg, alloc))) 04772 { 04773 delete quick_intrsect; 04774 DBUG_RETURN(NULL); 04775 } 04776 quick->file= NULL; 04777 quick_intrsect->cpk_quick= quick; 04778 } 04779 quick_intrsect->records= records; 04780 quick_intrsect->read_time= read_cost; 04781 } 04782 DBUG_RETURN(quick_intrsect); 04783 } 04784 04785 04786 QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 04787 bool retrieve_full_rows, 04788 MEM_ROOT *parent_alloc) 04789 { 04790 QUICK_ROR_UNION_SELECT *quick_roru; 04791 TABLE_READ_PLAN **scan; 04792 QUICK_SELECT_I *quick; 04793 DBUG_ENTER("TRP_ROR_UNION::make_quick"); 04794 /* 04795 It is impossible to construct a ROR-union that will not retrieve full 04796 rows, ignore retrieve_full_rows parameter. 04797 */ 04798 if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) 04799 { 04800 for (scan= first_ror; scan != last_ror; scan++) 04801 { 04802 if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) || 04803 quick_roru->push_quick_back(quick)) 04804 DBUG_RETURN(NULL); 04805 } 04806 quick_roru->records= records; 04807 quick_roru->read_time= read_cost; 04808 } 04809 DBUG_RETURN(quick_roru); 04810 } 04811 04812 04813 /* 04814 Build a SEL_TREE for <> or NOT BETWEEN predicate 04815 04816 SYNOPSIS 04817 get_ne_mm_tree() 04818 param PARAM from SQL_SELECT::test_quick_select 04819 cond_func item for the predicate 04820 field field in the predicate 04821 lt_value constant that field should be smaller 04822 gt_value constant that field should be greaterr 04823 cmp_type compare type for the field 04824 04825 RETURN 04826 # Pointer to tree built tree 04827 0 on error 04828 */ 04829 04830 static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 04831 Field *field, 04832 Item *lt_value, Item *gt_value, 04833 Item_result cmp_type) 04834 { 04835 SEL_TREE *tree; 04836 tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, 04837 lt_value, cmp_type); 04838 if (tree) 04839 { 04840 tree= tree_or(param, tree, get_mm_parts(param, cond_func, field, 04841 Item_func::GT_FUNC, 04842 gt_value, cmp_type)); 04843 } 04844 return tree; 04845 } 04846 04847 04848 /* 04849 Build a SEL_TREE for a simple predicate 04850 04851 SYNOPSIS 04852 get_func_mm_tree() 04853 param PARAM from SQL_SELECT::test_quick_select 04854 cond_func item for the predicate 04855 field field in the predicate 04856 value constant in the predicate 04857 cmp_type compare type for the field 04858 inv TRUE <> NOT cond_func is considered 04859 (makes sense only when cond_func is BETWEEN or IN) 04860 04861 RETURN 04862 Pointer to the tree built tree 04863 */ 04864 04865 static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 04866 Field *field, Item *value, 04867 Item_result cmp_type, bool inv) 04868 { 04869 SEL_TREE *tree= 0; 04870 DBUG_ENTER("get_func_mm_tree"); 04871 04872 switch (cond_func->functype()) { 04873 04874 case Item_func::NE_FUNC: 04875 tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type); 04876 break; 04877 04878 case Item_func::BETWEEN: 04879 if (inv) 04880 { 04881 tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1], 04882 cond_func->arguments()[2], cmp_type); 04883 } 04884 else 04885 { 04886 tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC, 04887 cond_func->arguments()[1],cmp_type); 04888 if (tree) 04889 { 04890 tree= tree_and(param, tree, get_mm_parts(param, cond_func, field, 04891 Item_func::LE_FUNC, 04892 cond_func->arguments()[2], 04893 cmp_type)); 04894 } 04895 } 04896 break; 04897 04898 case Item_func::IN_FUNC: 04899 { 04900 Item_func_in *func=(Item_func_in*) cond_func; 04901 04902 if (inv) 04903 { 04904 if (func->array && func->cmp_type != ROW_RESULT) 04905 { 04906 /* 04907 We get here for conditions in form "t.key NOT IN (c1, c2, ...)" 04908 (where c{i} are constants). 04909 Our goal is to produce a SEL_ARG graph that represents intervals: 04910 04911 ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*) 04912 04913 where $MIN is either "-inf" or NULL. 04914 04915 The most straightforward way to handle NOT IN would be to convert 04916 it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range 04917 optimizer to build SEL_ARG graph from that. However that will cause 04918 the range optimizer to use O(N^2) memory (it's a bug, not filed), 04919 and people do use big NOT IN lists (see BUG#15872). Also, for big 04920 NOT IN lists constructing/using graph (*) does not make the query 04921 faster. 04922 04923 So, we will handle NOT IN manually in the following way: 04924 * if the number of entries in the NOT IN list is less then 04925 NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*) 04926 manually. 04927 * Otherwise, we will construct a smaller graph: for 04928 "t.key NOT IN (c1,...cN)" we construct a graph representing 04929 ($MIN < t.key) OR (cN < t.key) // here sequence of c_i is 04930 // ordered. 04931 04932 A note about partially-covering indexes: for those (e.g. for 04933 "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very 04934 efficient): 04935 Instead of "t.key < c1" we get "t.key <= prefix-val(c1)". 04936 Combining the intervals in (*) together, we get: 04937 (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ... 04938 i.e. actually we get intervals combined into one interval: 04939 (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't 04940 cause any problems. 04941 */ 04942 MEM_ROOT *tmp_root= param->mem_root; 04943 param->thd->mem_root= param->old_root; 04944 /* 04945 Create one Item_type constant object. We'll need it as 04946 get_mm_parts only accepts constant values wrapped in Item_Type 04947 objects. 04948 We create the Item on param->mem_root which points to 04949 per-statement mem_root (while thd->mem_root is currently pointing 04950 to mem_root local to range optimizer). 04951 */ 04952 Item *value_item= func->array->create_item(); 04953 param->thd->mem_root= tmp_root; 04954 04955 if (!value_item) 04956 break; 04957 04958 /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ 04959 uint i=0; 04960 do 04961 { 04962 func->array->value_to_item(i, value_item); 04963 tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, 04964 value_item, cmp_type); 04965 if (!tree) 04966 break; 04967 i++; 04968 } while (i < func->array->count && tree->type == SEL_TREE::IMPOSSIBLE); 04969 04970 if (!tree || tree->type == SEL_TREE::IMPOSSIBLE) 04971 { 04972 /* We get here in cases like "t.unsigned NOT IN (-1,-2,-3) */ 04973 tree= NULL; 04974 break; 04975 } 04976 #define NOT_IN_IGNORE_THRESHOLD 1000 04977 SEL_TREE *tree2; 04978 if (func->array->count < NOT_IN_IGNORE_THRESHOLD) 04979 { 04980 for (; i < func->array->count; i++) 04981 { 04982 if (func->array->compare_elems(i, i-1)) 04983 { 04984 /* Get a SEL_TREE for "-inf < X < c_i" interval */ 04985 func->array->value_to_item(i, value_item); 04986 tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, 04987 value_item, cmp_type); 04988 if (!tree2) 04989 { 04990 tree= NULL; 04991 break; 04992 } 04993 04994 /* Change all intervals to be "c_{i-1} < X < c_i" */ 04995 for (uint idx= 0; idx < param->keys; idx++) 04996 { 04997 SEL_ARG *new_interval, *last_val; 04998 if (((new_interval= tree2->keys[idx])) && 04999 ((last_val= tree->keys[idx]->last()))) 05000 { 05001 new_interval->min_value= last_val->max_value; 05002 new_interval->min_flag= NEAR_MIN; 05003 } 05004 } 05005 /* 05006 The following doesn't try to allocate memory so no need to 05007 check for NULL. 05008 */ 05009 tree= tree_or(param, tree, tree2); 05010 } 05011 } 05012 } 05013 else 05014 func->array->value_to_item(func->array->count - 1, value_item); 05015 05016 if (tree && tree->type != SEL_TREE::IMPOSSIBLE) 05017 { 05018 /* 05019 Get the SEL_TREE for the last "c_last < X < +inf" interval 05020 (value_item cotains c_last already) 05021 */ 05022 tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC, 05023 value_item, cmp_type); 05024 tree= tree_or(param, tree, tree2); 05025 } 05026 } 05027 else 05028 { 05029 tree= get_ne_mm_tree(param, cond_func, field, 05030 func->arguments()[1], func->arguments()[1], 05031 cmp_type); 05032 if (tree) 05033 { 05034 Item **arg, **end; 05035 for (arg= func->arguments()+2, end= arg+func->argument_count()-2; 05036 arg < end ; arg++) 05037 { 05038 tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field, 05039 *arg, *arg, cmp_type)); 05040 } 05041 } 05042 } 05043 } 05044 else 05045 { 05046 tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC, 05047 func->arguments()[1], cmp_type); 05048 if (tree) 05049 { 05050 Item **arg, **end; 05051 for (arg= func->arguments()+2, end= arg+func->argument_count()-2; 05052 arg < end ; arg++) 05053 { 05054 tree= tree_or(param, tree, get_mm_parts(param, cond_func, field, 05055 Item_func::EQ_FUNC, 05056 *arg, cmp_type)); 05057 } 05058 } 05059 } 05060 break; 05061 } 05062 default: 05063 { 05064 /* 05065 Here the function for the following predicates are processed: 05066 <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL. 05067 If the predicate is of the form (value op field) it is handled 05068 as the equivalent predicate (field rev_op value), e.g. 05069 2 <= a is handled as a >= 2. 05070 */ 05071 Item_func::Functype func_type= 05072 (value != cond_func->arguments()[0]) ? cond_func->functype() : 05073 ((Item_bool_func2*) cond_func)->rev_functype(); 05074 tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type); 05075 } 05076 } 05077 05078 DBUG_RETURN(tree); 05079 05080 } 05081 05082 /* make a select tree of all keys in condition */ 05083 05084 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) 05085 { 05086 SEL_TREE *tree=0; 05087 SEL_TREE *ftree= 0; 05088 Item_field *field_item= 0; 05089 bool inv= FALSE; 05090 Item *value; 05091 DBUG_ENTER("get_mm_tree"); 05092 05093 if (cond->type() == Item::COND_ITEM) 05094 { 05095 List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); 05096 05097 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) 05098 { 05099 tree=0; 05100 Item *item; 05101 while ((item=li++)) 05102 { 05103 SEL_TREE *new_tree=get_mm_tree(param,item); 05104 if (param->thd->is_fatal_error) 05105 DBUG_RETURN(0); // out of memory 05106 tree=tree_and(param,tree,new_tree); 05107 if (tree && tree->type == SEL_TREE::IMPOSSIBLE) 05108 break; 05109 } 05110 } 05111 else 05112 { // COND OR 05113 tree=get_mm_tree(param,li++); 05114 if (tree) 05115 { 05116 Item *item; 05117 while ((item=li++)) 05118 { 05119 SEL_TREE *new_tree=get_mm_tree(param,item); 05120 if (!new_tree) 05121 DBUG_RETURN(0); // out of memory 05122 tree=tree_or(param,tree,new_tree); 05123 if (!tree || tree->type == SEL_TREE::ALWAYS) 05124 break; 05125 } 05126 } 05127 } 05128 DBUG_RETURN(tree); 05129 } 05130 /* Here when simple cond */ 05131 if (cond->const_item()) 05132 { 05133 /* 05134 During the cond->val_int() evaluation we can come across a subselect 05135 item which may allocate memory on the thd->mem_root and assumes 05136 all the memory allocated has the same life span as the subselect 05137 item itself. So we have to restore the thread's mem_root here. 05138 */ 05139 MEM_ROOT *tmp_root= param->mem_root; 05140 param->thd->mem_root= param->old_root; 05141 tree= cond->val_int() ? new(tmp_root) SEL_TREE(SEL_TREE::ALWAYS) : 05142 new(tmp_root) SEL_TREE(SEL_TREE::IMPOSSIBLE); 05143 param->thd->mem_root= tmp_root; 05144 DBUG_RETURN(tree); 05145 } 05146 05147 table_map ref_tables= 0; 05148 table_map param_comp= ~(param->prev_tables | param->read_tables | 05149 param->current_table); 05150 if (cond->type() != Item::FUNC_ITEM) 05151 { // Should be a field 05152 ref_tables= cond->used_tables(); 05153 if ((ref_tables & param->current_table) || 05154 (ref_tables & ~(param->prev_tables | param->read_tables))) 05155 DBUG_RETURN(0); 05156 DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE)); 05157 } 05158 05159 Item_func *cond_func= (Item_func*) cond; 05160 if (cond_func->functype() == Item_func::BETWEEN || 05161 cond_func->functype() == Item_func::IN_FUNC) 05162 inv= ((Item_func_opt_neg *) cond_func)->negated; 05163 else if (cond_func->select_optimize() == Item_func::OPTIMIZE_NONE) 05164 DBUG_RETURN(0); 05165 05166 param->cond= cond; 05167 05168 switch (cond_func->functype()) { 05169 case Item_func::BETWEEN: 05170 if (cond_func->arguments()[0]->real_item()->type() != Item::FIELD_ITEM) 05171 DBUG_RETURN(0); 05172 field_item= (Item_field*) (cond_func->arguments()[0]->real_item()); 05173 value= NULL; 05174 break; 05175 case Item_func::IN_FUNC: 05176 { 05177 Item_func_in *func=(Item_func_in*) cond_func; 05178 if (func->key_item()->real_item()->type() != Item::FIELD_ITEM) 05179 DBUG_RETURN(0); 05180 field_item= (Item_field*) (func->key_item()->real_item()); 05181 value= NULL; 05182 break; 05183 } 05184 case Item_func::MULT_EQUAL_FUNC: 05185 { 05186 Item_equal *item_equal= (Item_equal *) cond; 05187 if (!(value= item_equal->get_const())) 05188 DBUG_RETURN(0); 05189 Item_equal_iterator it(*item_equal); 05190 ref_tables= value->used_tables(); 05191 while ((field_item= it++)) 05192 { 05193 Field *field= field_item->field; 05194 Item_result cmp_type= field->cmp_type(); 05195 if (!((ref_tables | field->table->map) & param_comp)) 05196 { 05197 tree= get_mm_parts(param, cond, field, Item_func::EQ_FUNC, 05198 value,cmp_type); 05199 ftree= !ftree ? tree : tree_and(param, ftree, tree); 05200 } 05201 } 05202 05203 DBUG_RETURN(ftree); 05204 } 05205 default: 05206 if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM) 05207 { 05208 field_item= (Item_field*) (cond_func->arguments()[0]->real_item()); 05209 value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : 0; 05210 } 05211 else if (cond_func->have_rev_func() && 05212 cond_func->arguments()[1]->real_item()->type() == 05213 Item::FIELD_ITEM) 05214 { 05215 field_item= (Item_field*) (cond_func->arguments()[1]->real_item()); 05216 value= cond_func->arguments()[0]; 05217 } 05218 else 05219 DBUG_RETURN(0); 05220 } 05221 05222 /* 05223 If the where condition contains a predicate (ti.field op const), 05224 then not only SELL_TREE for this predicate is built, but 05225 the trees for the results of substitution of ti.field for 05226 each tj.field belonging to the same multiple equality as ti.field 05227 are built as well. 05228 E.g. for WHERE t1.a=t2.a AND t2.a > 10 05229 a SEL_TREE for t2.a > 10 will be built for quick select from t2 05230 and 05231 a SEL_TREE for t1.a > 10 will be built for quick select from t1. 05232 */ 05233 05234 for (uint i= 0; i < cond_func->arg_count; i++) 05235 { 05236 Item *arg= cond_func->arguments()[i]->real_item(); 05237 if (arg != field_item) 05238 ref_tables|= arg->used_tables(); 05239 } 05240 Field *field= field_item->field; 05241 Item_result cmp_type= field->cmp_type(); 05242 if (!((ref_tables | field->table->map) & param_comp)) 05243 ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv); 05244 Item_equal *item_equal= field_item->item_equal; 05245 if (item_equal) 05246 { 05247 Item_equal_iterator it(*item_equal); 05248 Item_field *item; 05249 while ((item= it++)) 05250 { 05251 Field *f= item->field; 05252 if (field->eq(f)) 05253 continue; 05254 if (!((ref_tables | f->table->map) & param_comp)) 05255 { 05256 tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv); 05257 ftree= !ftree ? tree : tree_and(param, ftree, tree); 05258 } 05259 } 05260 } 05261 DBUG_RETURN(ftree); 05262 } 05263 05264 05265 static SEL_TREE * 05266 get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field, 05267 Item_func::Functype type, 05268 Item *value, Item_result cmp_type) 05269 { 05270 DBUG_ENTER("get_mm_parts"); 05271 if (field->table != param->table) 05272 DBUG_RETURN(0); 05273 05274 KEY_PART *key_part = param->key_parts; 05275 KEY_PART *end = param->key_parts_end; 05276 SEL_TREE *tree=0; 05277 if (value && 05278 value->used_tables() & ~(param->prev_tables | param->read_tables)) 05279 DBUG_RETURN(0); 05280 for (; key_part != end ; key_part++) 05281 { 05282 if (field->eq(key_part->field)) 05283 { 05284 SEL_ARG *sel_arg=0; 05285 if (!tree && !(tree=new SEL_TREE())) 05286 DBUG_RETURN(0); // OOM 05287 if (!value || !(value->used_tables() & ~param->read_tables)) 05288 { 05289 sel_arg=get_mm_leaf(param,cond_func, 05290 key_part->field,key_part,type,value); 05291 if (!sel_arg) 05292 continue; 05293 if (sel_arg->type == SEL_ARG::IMPOSSIBLE) 05294 { 05295 tree->type=SEL_TREE::IMPOSSIBLE; 05296 DBUG_RETURN(tree); 05297 } 05298 } 05299 else 05300 { 05301 // This key may be used later 05302 if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY))) 05303 DBUG_RETURN(0); // OOM 05304 } 05305 sel_arg->part=(uchar) key_part->part; 05306 tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg); 05307 tree->keys_map.set_bit(key_part->key); 05308 } 05309 } 05310 05311 DBUG_RETURN(tree); 05312 } 05313 05314 05315 static SEL_ARG * 05316 get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, 05317 KEY_PART *key_part, Item_func::Functype type,Item *value) 05318 { 05319 uint maybe_null=(uint) field->real_maybe_null(); 05320 bool optimize_range; 05321 SEL_ARG *tree= 0; 05322 MEM_ROOT *alloc= param->mem_root; 05323 char *str; 05324 ulong orig_sql_mode; 05325 DBUG_ENTER("get_mm_leaf"); 05326 05327 /* 05328 We need to restore the runtime mem_root of the thread in this 05329 function because it evaluates the value of its argument, while 05330 the argument can be any, e.g. a subselect. The subselect 05331 items, in turn, assume that all the memory allocated during 05332 the evaluation has the same life span as the item itself. 05333 TODO: opt_range.cc should not reset thd->mem_root at all. 05334 */ 05335 param->thd->mem_root= param->old_root; 05336 if (!value) // IS NULL or IS NOT NULL 05337 { 05338 if (field->table->maybe_null) // Can't use a key on this 05339 goto end; 05340 if (!maybe_null) // Not null field 05341 { 05342 if (type == Item_func::ISNULL_FUNC) 05343 tree= &null_element; 05344 goto end; 05345 } 05346 if (!(tree= new (alloc) SEL_ARG(field,is_null_string,is_null_string))) 05347 goto end; // out of memory 05348 if (type == Item_func::ISNOTNULL_FUNC) 05349 { 05350 tree->min_flag=NEAR_MIN; /* IS NOT NULL -> X > NULL */ 05351 tree->max_flag=NO_MAX_RANGE; 05352 } 05353 goto end; 05354 } 05355 05356 /* 05357 1. Usually we can't use an index if the column collation 05358 differ from the operation collation. 05359 05360 2. However, we can reuse a case insensitive index for 05361 the binary searches: 05362 05363 WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin; 05364 05365 WHERE latin1_swedish_ci_colimn = BINARY 'a ' 05366 05367 */ 05368 if (field->result_type() == STRING_RESULT && 05369 value->result_type() == STRING_RESULT && 05370 key_part->image_type == Field::itRAW && 05371 ((Field_str*)field)->charset() != conf_func->compare_collation() && 05372 !(conf_func->compare_collation()->state & MY_CS_BINSORT)) 05373 goto end; 05374 05375 if (param->using_real_indexes) 05376 optimize_range= field->optimize_range(param->real_keynr[key_part->key], 05377 key_part->part); 05378 else 05379 optimize_range= TRUE; 05380 05381 if (type == Item_func::LIKE_FUNC) 05382 { 05383 bool like_error; 05384 char buff1[MAX_FIELD_WIDTH],*min_str,*max_str; 05385 String tmp(buff1,sizeof(buff1),value->collation.collation),*res; 05386 uint length,offset,min_length,max_length; 05387 uint field_length= field->pack_length()+maybe_null; 05388 05389 if (!optimize_range) 05390 goto end; 05391 if (!(res= value->val_str(&tmp))) 05392 { 05393 tree= &null_element; 05394 goto end; 05395 } 05396 05397 /* 05398 TODO: 05399 Check if this was a function. This should have be optimized away 05400 in the sql_select.cc 05401 */ 05402 if (res != &tmp) 05403 { 05404 tmp.copy(*res); // Get own copy 05405 res= &tmp; 05406 } 05407 if (field->cmp_type() != STRING_RESULT) 05408 goto end; // Can only optimize strings 05409 05410 offset=maybe_null; 05411 length=key_part->store_length; 05412 05413 if (length != key_part->length + maybe_null) 05414 { 05415 /* key packed with length prefix */ 05416 offset+= HA_KEY_BLOB_LENGTH; 05417 field_length= length - HA_KEY_BLOB_LENGTH; 05418 } 05419 else 05420 { 05421 if (unlikely(length < field_length)) 05422 { 05423 /* 05424 This can only happen in a table created with UNIREG where one key 05425 overlaps many fields 05426 */ 05427 length= field_length; 05428 } 05429 else 05430 field_length= length; 05431 } 05432 length+=offset; 05433 if (!(min_str= (char*) alloc_root(alloc, length*2))) 05434 goto end; 05435 05436 max_str=min_str+length; 05437 if (maybe_null) 05438 max_str[0]= min_str[0]=0; 05439 05440 field_length-= maybe_null; 05441 like_error= my_like_range(field->charset(), 05442 res->ptr(), res->length(), 05443 ((Item_func_like*)(param->cond))->escape, 05444 wild_one, wild_many, 05445 field_length, 05446 min_str+offset, max_str+offset, 05447 &min_length, &max_length); 05448 if (like_error) // Can't optimize with LIKE 05449 goto end; 05450 05451 if (offset != maybe_null) // BLOB or VARCHAR 05452 { 05453 int2store(min_str+maybe_null,min_length); 05454 int2store(max_str+maybe_null,max_length); 05455 } 05456 tree= new (alloc) SEL_ARG(field, min_str, max_str); 05457 goto end; 05458 } 05459 05460 if (!optimize_range && 05461 type != Item_func::EQ_FUNC && 05462 type != Item_func::EQUAL_FUNC) 05463 goto end; // Can't optimize this 05464 05465 /* 05466 We can't always use indexes when comparing a string index to a number 05467 cmp_type() is checked to allow compare of dates to numbers 05468 */ 05469 if (field->result_type() == STRING_RESULT && 05470 value->result_type() != STRING_RESULT && 05471 field->cmp_type() != value->result_type()) 05472 goto end; 05473 /* For comparison purposes allow invalid dates like 2000-01-32 */ 05474 orig_sql_mode= field->table->in_use->variables.sql_mode; 05475 if (value->real_item()->type() == Item::STRING_ITEM && 05476 (field->type() == FIELD_TYPE_DATE || 05477 field->type() == FIELD_TYPE_DATETIME)) 05478 field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES; 05479 if (value->save_in_field_no_warnings(field, 1) < 0) 05480 { 05481 field->table->in_use->variables.sql_mode= orig_sql_mode; 05482 /* This happens when we try to insert a NULL field in a not null column */ 05483 tree= &null_element; // cmp with NULL is never TRUE 05484 goto end; 05485 } 05486 field->table->in_use->variables.sql_mode= orig_sql_mode; 05487 str= (char*) alloc_root(alloc, key_part->store_length+1); 05488 if (!str) 05489 goto end; 05490 if (maybe_null) 05491 *str= (char) field->is_real_null(); // Set to 1 if null 05492 field->get_key_image(str+maybe_null, key_part->length, key_part->image_type); 05493 if (!(tree= new (alloc) SEL_ARG(field, str, str))) 05494 goto end; // out of memory 05495 05496 /* 05497 Check if we are comparing an UNSIGNED integer with a negative constant. 05498 In this case we know that: 05499 (a) (unsigned_int [< | <=] negative_constant) == FALSE 05500 (b) (unsigned_int [> | >=] negative_constant) == TRUE 05501 In case (a) the condition is false for all values, and in case (b) it 05502 is true for all values, so we can avoid unnecessary retrieval and condition 05503 testing, and we also get correct comparison of unsinged integers with 05504 negative integers (which otherwise fails because at query execution time 05505 negative integers are cast to unsigned if compared with unsigned). 05506 */ 05507 if (field->result_type() == INT_RESULT && 05508 value->result_type() == INT_RESULT && 05509 ((Field_num*)field)->unsigned_flag && !((Item_int*)value)->unsigned_flag) 05510 { 05511 longlong item_val= value->val_int(); 05512 if (item_val < 0) 05513 { 05514 if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC) 05515 { 05516 tree->type= SEL_ARG::IMPOSSIBLE; 05517 goto end; 05518 } 05519 if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC) 05520 { 05521 tree= 0; 05522 goto end; 05523 } 05524 } 05525 } 05526 05527 switch (type) { 05528 case Item_func::LT_FUNC: 05529 if (field_is_equal_to_item(field,value)) 05530 tree->max_flag=NEAR_MAX; 05531 /* fall through */ 05532 case Item_func::LE_FUNC: 05533 if (!maybe_null) 05534 tree->min_flag=NO_MIN_RANGE; /* From start */ 05535 else 05536 { // > NULL 05537 tree->min_value=is_null_string; 05538 tree->min_flag=NEAR_MIN; 05539 } 05540 break; 05541 case Item_func::GT_FUNC: 05542 if (field_is_equal_to_item(field,value)) 05543 tree->min_flag=NEAR_MIN; 05544 /* fall through */ 05545 case Item_func::GE_FUNC: 05546 tree->max_flag=NO_MAX_RANGE; 05547 break; 05548 case Item_func::SP_EQUALS_FUNC: 05549 tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512; 05550 tree->max_flag=NO_MAX_RANGE; 05551 break; 05552 case Item_func::SP_DISJOINT_FUNC: 05553 tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512; 05554 tree->max_flag=NO_MAX_RANGE; 05555 break; 05556 case Item_func::SP_INTERSECTS_FUNC: 05557 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; 05558 tree->max_flag=NO_MAX_RANGE; 05559 break; 05560 case Item_func::SP_TOUCHES_FUNC: 05561 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; 05562 tree->max_flag=NO_MAX_RANGE; 05563 break; 05564 05565 case Item_func::SP_CROSSES_FUNC: 05566 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; 05567 tree->max_flag=NO_MAX_RANGE; 05568 break; 05569 case Item_func::SP_WITHIN_FUNC: 05570 tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512; 05571 tree->max_flag=NO_MAX_RANGE; 05572 break; 05573 05574 case Item_func::SP_CONTAINS_FUNC: 05575 tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512; 05576 tree->max_flag=NO_MAX_RANGE; 05577 break; 05578 case Item_func::SP_OVERLAPS_FUNC: 05579 tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512; 05580 tree->max_flag=NO_MAX_RANGE; 05581 break; 05582 05583 default: 05584 break; 05585 } 05586 05587 end: 05588 param->thd->mem_root= alloc; 05589 DBUG_RETURN(tree); 05590 } 05591 05592 05593 /****************************************************************************** 05594 ** Tree manipulation functions 05595 ** If tree is 0 it means that the condition can't be tested. It refers 05596 ** to a non existent table or to a field in current table with isn't a key. 05597 ** The different tree flags: 05598 ** IMPOSSIBLE: Condition is never TRUE 05599 ** ALWAYS: Condition is always TRUE 05600 ** MAYBE: Condition may exists when tables are read 05601 ** MAYBE_KEY: Condition refers to a key that may be used in join loop 05602 ** KEY_RANGE: Condition uses a key 05603 ******************************************************************************/ 05604 05605 /* 05606 Add a new key test to a key when scanning through all keys 05607 This will never be called for same key parts. 05608 */ 05609 05610 static SEL_ARG * 05611 sel_add(SEL_ARG *key1,SEL_ARG *key2) 05612 { 05613 SEL_ARG *root,**key_link; 05614 05615 if (!key1) 05616 return key2; 05617 if (!key2) 05618 return key1; 05619 05620 key_link= &root; 05621 while (key1 && key2) 05622 { 05623 if (key1->part < key2->part) 05624 { 05625 *key_link= key1; 05626 key_link= &key1->next_key_part; 05627 key1=key1->next_key_part; 05628 } 05629 else 05630 { 05631 *key_link= key2; 05632 key_link= &key2->next_key_part; 05633 key2=key2->next_key_part; 05634 } 05635 } 05636 *key_link=key1 ? key1 : key2; 05637 return root; 05638 } 05639 05640 #define CLONE_KEY1_MAYBE 1 05641 #define CLONE_KEY2_MAYBE 2 05642 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) 05643 05644 05645 static SEL_TREE * 05646 tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) 05647 { 05648 DBUG_ENTER("tree_and"); 05649 if (!tree1) 05650 DBUG_RETURN(tree2); 05651 if (!tree2) 05652 DBUG_RETURN(tree1); 05653 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS) 05654 DBUG_RETURN(tree1); 05655 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS) 05656 DBUG_RETURN(tree2); 05657 if (tree1->type == SEL_TREE::MAYBE) 05658 { 05659 if (tree2->type == SEL_TREE::KEY) 05660 tree2->type=SEL_TREE::KEY_SMALLER; 05661 DBUG_RETURN(tree2); 05662 } 05663 if (tree2->type == SEL_TREE::MAYBE) 05664 { 05665 tree1->type=SEL_TREE::KEY_SMALLER; 05666 DBUG_RETURN(tree1); 05667 } 05668 05669 key_map result_keys; 05670 result_keys.clear_all(); 05671 /* Join the trees key per key */ 05672 SEL_ARG **key1,**key2,**end; 05673 for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ; 05674 key1 != end ; key1++,key2++) 05675 { 05676 uint flag=0; 05677 if (*key1 || *key2) 05678 { 05679 if (*key1 && !(*key1)->simple_key()) 05680 flag|=CLONE_KEY1_MAYBE; 05681 if (*key2 && !(*key2)->simple_key()) 05682 flag|=CLONE_KEY2_MAYBE; 05683 *key1=key_and(*key1,*key2,flag); 05684 if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE) 05685 { 05686 tree1->type= SEL_TREE::IMPOSSIBLE; 05687 DBUG_RETURN(tree1); 05688 } 05689 result_keys.set_bit(key1 - tree1->keys); 05690 #ifdef EXTRA_DEBUG 05691 if (*key1) 05692 (*key1)->test_use_count(*key1); 05693 #endif 05694 } 05695 } 05696 tree1->keys_map= result_keys; 05697 /* dispose index_merge if there is a "range" option */ 05698 if (!result_keys.is_clear_all()) 05699 { 05700 tree1->merges.empty(); 05701 DBUG_RETURN(tree1); 05702 } 05703 05704 /* ok, both trees are index_merge trees */ 05705 imerge_list_and_list(&tree1->merges, &tree2->merges); 05706 DBUG_RETURN(tree1); 05707 } 05708 05709 05710 /* 05711 Check if two SEL_TREES can be combined into one (i.e. a single key range 05712 read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without 05713 using index_merge. 05714 */ 05715 05716 bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, 05717 RANGE_OPT_PARAM* param) 05718 { 05719 key_map common_keys= tree1->keys_map; 05720 DBUG_ENTER("sel_trees_can_be_ored"); 05721 common_keys.intersect(tree2->keys_map); 05722 05723 if (common_keys.is_clear_all()) 05724 DBUG_RETURN(FALSE); 05725 05726 /* trees have a common key, check if they refer to same key part */ 05727 SEL_ARG **key1,**key2; 05728 for (uint key_no=0; key_no < param->keys; key_no++) 05729 { 05730 if (common_keys.is_set(key_no)) 05731 { 05732 key1= tree1->keys + key_no; 05733 key2= tree2->keys + key_no; 05734 if ((*key1)->part == (*key2)->part) 05735 { 05736 DBUG_RETURN(TRUE); 05737 } 05738 } 05739 } 05740 DBUG_RETURN(FALSE); 05741 } 05742 05743 05744 /* 05745 Remove the trees that are not suitable for record retrieval. 05746 SYNOPSIS 05747 param Range analysis parameter 05748 tree Tree to be processed, tree->type is KEY or KEY_SMALLER 05749 05750 DESCRIPTION 05751 This function walks through tree->keys[] and removes the SEL_ARG* trees 05752 that are not "maybe" trees (*) and cannot be used to construct quick range 05753 selects. 05754 (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of 05755 these types here as well. 05756 05757 A SEL_ARG* tree cannot be used to construct quick select if it has 05758 tree->part != 0. (e.g. it could represent "keypart2 < const"). 05759 05760 WHY THIS FUNCTION IS NEEDED 05761 05762 Normally we allow construction of SEL_TREE objects that have SEL_ARG 05763 trees that do not allow quick range select construction. For example for 05764 " keypart1=1 AND keypart2=2 " the execution will proceed as follows: 05765 tree1= SEL_TREE { SEL_ARG{keypart1=1} } 05766 tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select 05767 from this 05768 call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG 05769 tree. 05770 05771 There is an exception though: when we construct index_merge SEL_TREE, 05772 any SEL_ARG* tree that cannot be used to construct quick range select can 05773 be removed, because current range analysis code doesn't provide any way 05774 that tree could be later combined with another tree. 05775 Consider an example: we should not construct 05776 st1 = SEL_TREE { 05777 merges = SEL_IMERGE { 05778 SEL_TREE(t.key1part1 = 1), 05779 SEL_TREE(t.key2part2 = 2) -- (*) 05780 } 05781 }; 05782 because 05783 - (*) cannot be used to construct quick range select, 05784 - There is no execution path that would cause (*) to be converted to 05785 a tree that could be used. 05786 05787 The latter is easy to verify: first, notice that the only way to convert 05788 (*) into a usable tree is to call tree_and(something, (*)). 05789 05790 Second look at what tree_and/tree_or function would do when passed a 05791 SEL_TREE that has the structure like st1 tree has, and conlcude that 05792 tree_and(something, (*)) will not be called. 05793 05794 RETURN 05795 0 Ok, some suitable trees left 05796 1 No tree->keys[] left. 05797 */ 05798 05799 static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) 05800 { 05801 bool res= FALSE; 05802 for (uint i=0; i < param->keys; i++) 05803 { 05804 if (tree->keys[i]) 05805 { 05806 if (tree->keys[i]->part) 05807 { 05808 tree->keys[i]= NULL; 05809 tree->keys_map.clear_bit(i); 05810 } 05811 else 05812 res= TRUE; 05813 } 05814 } 05815 return !res; 05816 } 05817 05818 05819 static SEL_TREE * 05820 tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) 05821 { 05822 DBUG_ENTER("tree_or"); 05823 if (!tree1 || !tree2) 05824 DBUG_RETURN(0); 05825 if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS) 05826 DBUG_RETURN(tree2); 05827 if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS) 05828 DBUG_RETURN(tree1); 05829 if (tree1->type == SEL_TREE::MAYBE) 05830 DBUG_RETURN(tree1); // Can't use this 05831 if (tree2->type == SEL_TREE::MAYBE) 05832 DBUG_RETURN(tree2); 05833 05834 SEL_TREE *result= 0; 05835 key_map result_keys; 05836 result_keys.clear_all(); 05837 if (sel_trees_can_be_ored(tree1, tree2, param)) 05838 { 05839 /* Join the trees key per key */ 05840 SEL_ARG **key1,**key2,**end; 05841 for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ; 05842 key1 != end ; key1++,key2++) 05843 { 05844 *key1=key_or(*key1,*key2); 05845 if (*key1) 05846 { 05847 result=tree1; // Added to tree1 05848 result_keys.set_bit(key1 - tree1->keys); 05849 #ifdef EXTRA_DEBUG 05850 (*key1)->test_use_count(*key1); 05851 #endif 05852 } 05853 } 05854 if (result) 05855 result->keys_map= result_keys; 05856 } 05857 else 05858 { 05859 /* ok, two trees have KEY type but cannot be used without index merge */ 05860 if (tree1->merges.is_empty() && tree2->merges.is_empty()) 05861 { 05862 if (param->remove_jump_scans) 05863 { 05864 bool no_trees= remove_nonrange_trees(param, tree1); 05865 no_trees= no_trees || remove_nonrange_trees(param, tree2); 05866 if (no_trees) 05867 DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); 05868 } 05869 SEL_IMERGE *merge; 05870 /* both trees are "range" trees, produce new index merge structure */ 05871 if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || 05872 (result->merges.push_back(merge)) || 05873 (merge->or_sel_tree(param, tree1)) || 05874 (merge->or_sel_tree(param, tree2))) 05875 result= NULL; 05876 else 05877 result->type= tree1->type; 05878 } 05879 else if (!tree1->merges.is_empty() && !tree2->merges.is_empty()) 05880 { 05881 if (imerge_list_or_list(param, &tree1->merges, &tree2->merges)) 05882 result= new SEL_TREE(SEL_TREE::ALWAYS); 05883 else 05884 result= tree1; 05885 } 05886 else 05887 { 05888 /* one tree is index merge tree and another is range tree */ 05889 if (tree1->merges.is_empty()) 05890 swap_variables(SEL_TREE*, tree1, tree2); 05891 05892 if (param->remove_jump_scans && remove_nonrange_trees(param, tree2)) 05893 DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); 05894 /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ 05895 if (imerge_list_or_tree(param, &tree1->merges, tree2)) 05896 result= new SEL_TREE(SEL_TREE::ALWAYS); 05897 else 05898 result= tree1; 05899 } 05900 } 05901 DBUG_RETURN(result); 05902 } 05903 05904 05905 /* And key trees where key1->part < key2 -> part */ 05906 05907 static SEL_ARG * 05908 and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag) 05909 { 05910 SEL_ARG *next; 05911 ulong use_count=key1->use_count; 05912 05913 if (key1->elements != 1) 05914 { 05915 key2->use_count+=key1->elements-1; 05916 key2->increment_use_count((int) key1->elements-1); 05917 } 05918 if (key1->type == SEL_ARG::MAYBE_KEY) 05919 { 05920 key1->right= key1->left= &null_element; 05921 key1->next= key1->prev= 0; 05922 } 05923 for (next=key1->first(); next ; next=next->next) 05924 { 05925 if (next->next_key_part) 05926 { 05927 SEL_ARG *tmp=key_and(next->next_key_part,key2,clone_flag); 05928 if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE) 05929 { 05930 key1=key1->tree_delete(next); 05931 continue; 05932 } 05933 next->next_key_part=tmp; 05934 if (use_count) 05935 next->increment_use_count(use_count); 05936 } 05937 else 05938 next->next_key_part=key2; 05939 } 05940 if (!key1) 05941 return &null_element; // Impossible ranges 05942 key1->use_count++; 05943 return key1; 05944 } 05945 05946 05947 /* 05948 Produce a SEL_ARG graph that represents "key1 AND key2" 05949 05950 SYNOPSIS 05951 key_and() 05952 key1 First argument, root of its RB-tree 05953 key2 Second argument, root of its RB-tree 05954 05955 RETURN 05956 RB-tree root of the resulting SEL_ARG graph. 05957 NULL if the result of AND operation is an empty interval {0}. 05958 */ 05959 05960 static SEL_ARG * 05961 key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) 05962 { 05963 if (!key1) 05964 return key2; 05965 if (!key2) 05966 return key1; 05967 if (key1->part != key2->part) 05968 { 05969 if (key1->part > key2->part) 05970 { 05971 swap_variables(SEL_ARG *, key1, key2); 05972 clone_flag=swap_clone_flag(clone_flag); 05973 } 05974 // key1->part < key2->part 05975 key1->use_count--; 05976 if (key1->use_count > 0) 05977 if (!(key1= key1->clone_tree())) 05978 return 0; // OOM 05979 return and_all_keys(key1,key2,clone_flag); 05980 } 05981 05982 if (((clone_flag & CLONE_KEY2_MAYBE) && 05983 !(clone_flag & CLONE_KEY1_MAYBE) && 05984 key2->type != SEL_ARG::MAYBE_KEY) || 05985 key1->type == SEL_ARG::MAYBE_KEY) 05986 { // Put simple key in key2 05987 swap_variables(SEL_ARG *, key1, key2); 05988 clone_flag=swap_clone_flag(clone_flag); 05989 } 05990 05991 /* If one of the key is MAYBE_KEY then the found region may be smaller */ 05992 if (key2->type == SEL_ARG::MAYBE_KEY) 05993 { 05994 if (key1->use_count > 1) 05995 { 05996 key1->use_count--; 05997 if (!(key1=key1->clone_tree())) 05998 return 0; // OOM 05999 key1->use_count++; 06000 } 06001 if (key1->type == SEL_ARG::MAYBE_KEY) 06002 { // Both are maybe key 06003 key1->next_key_part=key_and(key1->next_key_part,key2->next_key_part, 06004 clone_flag); 06005 if (key1->next_key_part && 06006 key1->next_key_part->type == SEL_ARG::IMPOSSIBLE) 06007 return key1; 06008 } 06009 else 06010 { 06011 key1->maybe_smaller(); 06012 if (key2->next_key_part) 06013 { 06014 key1->use_count--; // Incremented in and_all_keys 06015 return and_all_keys(key1,key2,clone_flag); 06016 } 06017 key2->use_count--; // Key2 doesn't have a tree 06018 } 06019 return key1; 06020 } 06021 06022 if ((key1->min_flag | key2->min_flag) & GEOM_FLAG) 06023 { 06024 /* TODO: why not leave one of the trees? */ 06025 key1->free_tree(); 06026 key2->free_tree(); 06027 return 0; // Can't optimize this 06028 } 06029 06030 if ((key1->min_flag | key2->min_flag) & GEOM_FLAG) 06031 { 06032 key1->free_tree(); 06033 key2->free_tree(); 06034 return 0; // Can't optimize this 06035 } 06036 06037 key1->use_count--; 06038 key2->use_count--; 06039 SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0; 06040 06041 while (e1 && e2) 06042 { 06043 int cmp=e1->cmp_min_to_min(e2); 06044 if (cmp < 0) 06045 { 06046 if (get_range(&e1,&e2,key1)) 06047 continue; 06048 } 06049 else if (get_range(&e2,&e1,key2)) 06050 continue; 06051 SEL_ARG *next=key_and(e1->next_key_part,e2->next_key_part,clone_flag); 06052 e1->increment_use_count(1); 06053 e2->increment_use_count(1); 06054 if (!next || next->type != SEL_ARG::IMPOSSIBLE) 06055 { 06056 SEL_ARG *new_arg= e1->clone_and(e2); 06057 if (!new_arg) 06058 return &null_element; // End of memory 06059 new_arg->next_key_part=next; 06060 if (!new_tree) 06061 { 06062 new_tree=new_arg; 06063 } 06064 else 06065 new_tree=new_tree->insert(new_arg); 06066 } 06067 if (e1->cmp_max_to_max(e2) < 0) 06068 e1=e1->next; // e1 can't overlapp next e2 06069 else 06070 e2=e2->next; 06071 } 06072 key1->free_tree(); 06073 key2->free_tree(); 06074 if (!new_tree) 06075 return &null_element; // Impossible range 06076 return new_tree; 06077 } 06078 06079 06080 static bool 06081 get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1) 06082 { 06083 (*e1)=root1->find_range(*e2); // first e1->min < e2->min 06084 if ((*e1)->cmp_max_to_min(*e2) < 0) 06085 { 06086 if (!((*e1)=(*e1)->next)) 06087 return 1; 06088 if ((*e1)->cmp_min_to_max(*e2) > 0) 06089 { 06090 (*e2)=(*e2)->next; 06091 return 1; 06092 } 06093 } 06094 return 0; 06095 } 06096 06097 06098 static SEL_ARG * 06099 key_or(SEL_ARG *key1,SEL_ARG *key2) 06100 { 06101 if (!key1) 06102 { 06103 if (key2) 06104 { 06105 key2->use_count--; 06106 key2->free_tree(); 06107 } 06108 return 0; 06109 } 06110 if (!key2) 06111 { 06112 key1->use_count--; 06113 key1->free_tree(); 06114 return 0; 06115 } 06116 key1->use_count--; 06117 key2->use_count--; 06118 06119 if (key1->part != key2->part || 06120 (key1->min_flag | key2->min_flag) & GEOM_FLAG) 06121 { 06122 key1->free_tree(); 06123 key2->free_tree(); 06124 return 0; // Can't optimize this 06125 } 06126 06127 // If one of the key is MAYBE_KEY then the found region may be bigger 06128 if (key1->type == SEL_ARG::MAYBE_KEY) 06129 { 06130 key2->free_tree(); 06131 key1->use_count++; 06132 return key1; 06133 } 06134 if (key2->type == SEL_ARG::MAYBE_KEY) 06135 { 06136 key1->free_tree(); 06137 key2->use_count++; 06138 return key2; 06139 } 06140 06141 if (key1->use_count > 0) 06142 { 06143 if (key2->use_count == 0 || key1->elements > key2->elements) 06144 { 06145 swap_variables(SEL_ARG *,key1,key2); 06146 } 06147 if (key1->use_count > 0 || !(key1=key1->clone_tree())) 06148 return 0; // OOM 06149 } 06150 06151 // Add tree at key2 to tree at key1 06152 bool key2_shared=key2->use_count != 0; 06153 key1->maybe_flag|=key2->maybe_flag; 06154 06155 for (key2=key2->first(); key2; ) 06156 { 06157 SEL_ARG *tmp=key1->find_range(key2); // Find key1.min <= key2.min 06158 int cmp; 06159 06160 if (!tmp) 06161 { 06162 tmp=key1->first(); // tmp.min > key2.min 06163 cmp= -1; 06164 } 06165 else if ((cmp=tmp->cmp_max_to_min(key2)) < 0) 06166 { // Found tmp.max < key2.min 06167 SEL_ARG *next=tmp->next; 06168 if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part)) 06169 { 06170 // Join near ranges like tmp.max < 0 and key2.min >= 0 06171 SEL_ARG *key2_next=key2->next; 06172 if (key2_shared) 06173 { 06174 if (!(key2=new SEL_ARG(*key2))) 06175 return 0; // out of memory 06176 key2->increment_use_count(key1->use_count+1); 06177 key2->next=key2_next; // New copy of key2 06178 } 06179 key2->copy_min(tmp); 06180 if (!(key1=key1->tree_delete(tmp))) 06181 { // Only one key in tree 06182 key1=key2; 06183 key1->make_root(); 06184 key2=key2_next; 06185 break; 06186 } 06187 } 06188 if (!(tmp=next)) // tmp.min > key2.min 06189 break; // Copy rest of key2 06190 } 06191 if (cmp < 0) 06192 { // tmp.min > key2.min 06193 int tmp_cmp; 06194 if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max 06195 { 06196 if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part)) 06197 { // ranges are connected 06198 tmp->copy_min_to_min(key2); 06199 key1->merge_flags(key2); 06200 if (tmp->min_flag & NO_MIN_RANGE && 06201 tmp->max_flag & NO_MAX_RANGE) 06202 { 06203 if (key1->maybe_flag) 06204 return new SEL_ARG(SEL_ARG::MAYBE_KEY); 06205 return 0; 06206 } 06207 key2->increment_use_count(-1); // Free not used tree 06208 key2=key2->next; 06209 continue; 06210 } 06211 else 06212 { 06213 SEL_ARG *next=key2->next; // Keys are not overlapping 06214 if (key2_shared) 06215 { 06216 SEL_ARG *cpy= new SEL_ARG(*key2); // Must make copy 06217 if (!cpy) 06218 return 0; // OOM 06219 key1=key1->insert(cpy); 06220 key2->increment_use_count(key1->use_count+1); 06221 } 06222 else 06223 key1=key1->insert(key2); // Will destroy key2_root 06224 key2=next; 06225 continue; 06226 } 06227 } 06228 } 06229 06230 // tmp.max >= key2.min && tmp.min <= key.max (overlapping ranges) 06231 if (eq_tree(tmp->next_key_part,key2->next_key_part)) 06232 { 06233 if (tmp->is_same(key2)) 06234 { 06235 tmp->merge_flags(key2); // Copy maybe flags 06236 key2->increment_use_count(-1); // Free not used tree 06237 } 06238 else 06239 { 06240 SEL_ARG *last=tmp; 06241 while (last->next && last->next->cmp_min_to_max(key2) <= 0 && 06242 eq_tree(last->next->next_key_part,key2->next_key_part)) 06243 { 06244 SEL_ARG *save=last; 06245 last=last->next; 06246 key1=key1->tree_delete(save); 06247 } 06248 last->copy_min(tmp); 06249 if (last->copy_min(key2) || last->copy_max(key2)) 06250 { // Full range 06251 key1->free_tree(); 06252 for (; key2 ; key2=key2->next) 06253 key2->increment_use_count(-1); // Free not used tree 06254 if (key1->maybe_flag) 06255 return new SEL_ARG(SEL_ARG::MAYBE_KEY); 06256 return 0; 06257 } 06258 } 06259 key2=key2->next; 06260 continue; 06261 } 06262 06263 if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0) 06264 { // tmp.min <= x < key2.min 06265 SEL_ARG *new_arg=tmp->clone_first(key2); 06266 if (!new_arg) 06267 return 0; // OOM 06268 if ((new_arg->next_key_part= key1->next_key_part)) 06269 new_arg->increment_use_count(key1->use_count+1); 06270 tmp->copy_min_to_min(key2); 06271 key1=key1->insert(new_arg); 06272 } 06273 06274 // tmp.min >= key2.min && tmp.min <= key2.max 06275 SEL_ARG key(*key2); // Get copy we can modify 06276 for (;;) 06277 { 06278 if (tmp->cmp_min_to_min(&key) > 0) 06279 { // key.min <= x < tmp.min 06280 SEL_ARG *new_arg=key.clone_first(tmp); 06281 if (!new_arg) 06282 return 0; // OOM 06283 if ((new_arg->next_key_part=key.next_key_part)) 06284 new_arg->increment_use_count(key1->use_count+1); 06285 key1=key1->insert(new_arg); 06286 } 06287 if ((cmp=tmp->cmp_max_to_max(&key)) <= 0) 06288 { // tmp.min. <= x <= tmp.max 06289 tmp->maybe_flag|= key.maybe_flag; 06290 key.increment_use_count(key1->use_count+1); 06291 tmp->next_key_part=key_or(tmp->next_key_part,key.next_key_part); 06292 if (!cmp) // Key2 is ready 06293 break; 06294 key.copy_max_to_min(tmp); 06295 if (!(tmp=tmp->next)) 06296 { 06297 SEL_ARG *tmp2= new SEL_ARG(key); 06298 if (!tmp2) 06299 return 0; // OOM 06300 key1=key1->insert(tmp2); 06301 key2=key2->next; 06302 goto end; 06303 } 06304 if (tmp->cmp_min_to_max(&key) > 0) 06305 { 06306 SEL_ARG *tmp2= new SEL_ARG(key); 06307 if (!tmp2) 06308 return 0; // OOM 06309 key1=key1->insert(tmp2); 06310 break; 06311 } 06312 } 06313 else 06314 { 06315 SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max 06316 if (!new_arg) 06317 return 0; // OOM 06318 tmp->copy_max_to_min(&key); 06319 tmp->increment_use_count(key1->use_count+1); 06320 /* Increment key count as it may be used for next loop */ 06321 key.increment_use_count(1); 06322 new_arg->next_key_part=key_or(tmp->next_key_part,key.next_key_part); 06323 key1=key1->insert(new_arg); 06324 break; 06325 } 06326 } 06327 key2=key2->next; 06328 } 06329 06330 end: 06331 while (key2) 06332 { 06333 SEL_ARG *next=key2->next; 06334 if (key2_shared) 06335 { 06336 SEL_ARG *tmp=new SEL_ARG(*key2); // Must make copy 06337 if (!tmp) 06338 return 0; 06339 key2->increment_use_count(key1->use_count+1); 06340 key1=key1->insert(tmp); 06341 } 06342 else 06343 key1=key1->insert(key2); // Will destroy key2_root 06344 key2=next; 06345 } 06346 key1->use_count++; 06347 return key1; 06348 } 06349 06350 06351 /* Compare if two trees are equal */ 06352 06353 static bool eq_tree(SEL_ARG* a,SEL_ARG *b) 06354 { 06355 if (a == b) 06356 return 1; 06357 if (!a || !b || !a->is_same(b)) 06358 return 0; 06359 if (a->left != &null_element && b->left != &null_element) 06360 { 06361 if (!eq_tree(a->left,b->left)) 06362 return 0; 06363 } 06364 else if (a->left != &null_element || b->left != &null_element) 06365 return 0; 06366 if (a->right != &null_element && b->right != &null_element) 06367 { 06368 if (!eq_tree(a->right,b->right)) 06369 return 0; 06370 } 06371 else if (a->right != &null_element || b->right != &null_element) 06372 return 0; 06373 if (a->next_key_part != b->next_key_part) 06374 { // Sub range 06375 if (!a->next_key_part != !b->next_key_part || 06376 !eq_tree(a->next_key_part, b->next_key_part)) 06377 return 0; 06378 } 06379 return 1; 06380 } 06381 06382 06383 SEL_ARG * 06384 SEL_ARG::insert(SEL_ARG *key) 06385 { 06386 SEL_ARG *element,**par,*last_element; 06387 LINT_INIT(par); 06388 LINT_INIT(last_element); 06389 06390 for (element= this; element != &null_element ; ) 06391 { 06392 last_element=element; 06393 if (key->cmp_min_to_min(element) > 0) 06394 { 06395 par= &element->right; element= element->right; 06396 } 06397 else 06398 { 06399 par = &element->left; element= element->left; 06400 } 06401 } 06402 *par=key; 06403 key->parent=last_element; 06404 /* Link in list */ 06405 if (par == &last_element->left) 06406 { 06407 key->next=last_element; 06408 if ((key->prev=last_element->prev)) 06409 key->prev->next=key; 06410 last_element->prev=key; 06411 } 06412 else 06413 { 06414 if ((key->next=last_element->next)) 06415 key->next->prev=key; 06416 key->prev=last_element; 06417 last_element->next=key; 06418 } 06419 key->left=key->right= &null_element; 06420 SEL_ARG *root=rb_insert(key); // rebalance tree 06421 root->use_count=this->use_count; // copy root info 06422 root->elements= this->elements+1; 06423 root->maybe_flag=this->maybe_flag; 06424 return root; 06425 } 06426 06427 06428 /* 06429 ** Find best key with min <= given key 06430 ** Because the call context this should never return 0 to get_range 06431 */ 06432 06433 SEL_ARG * 06434 SEL_ARG::find_range(SEL_ARG *key) 06435 { 06436 SEL_ARG *element=this,*found=0; 06437 06438 for (;;) 06439 { 06440 if (element == &null_element) 06441 return found; 06442 int cmp=element->cmp_min_to_min(key); 06443 if (cmp == 0) 06444 return element; 06445 if (cmp < 0) 06446 { 06447 found=element; 06448 element=element->right; 06449 } 06450 else 06451 element=element->left; 06452 } 06453 } 06454 06455 06456 /* 06457 Remove a element from the tree 06458 06459 SYNOPSIS 06460 tree_delete() 06461 key Key that is to be deleted from tree (this) 06462 06463 NOTE 06464 This also frees all sub trees that is used by the element 06465 06466 RETURN 06467 root of new tree (with key deleted) 06468 */ 06469 06470 SEL_ARG * 06471 SEL_ARG::tree_delete(SEL_ARG *key) 06472 { 06473 enum leaf_color remove_color; 06474 SEL_ARG *root,*nod,**par,*fix_par; 06475 DBUG_ENTER("tree_delete"); 06476 06477 root=this; 06478 this->parent= 0; 06479 06480 /* Unlink from list */ 06481 if (key->prev) 06482 key->prev->next=key->next; 06483 if (key->next) 06484 key->next->prev=key->prev; 06485 key->increment_use_count(-1); 06486 if (!key->parent) 06487 par= &root; 06488 else 06489 par=key->parent_ptr(); 06490 06491 if (key->left == &null_element) 06492 { 06493 *par=nod=key->right; 06494 fix_par=key->parent; 06495 if (nod != &null_element) 06496 nod->parent=fix_par; 06497 remove_color= key->color; 06498 } 06499 else if (key->right == &null_element) 06500 { 06501 *par= nod=key->left; 06502 nod->parent=fix_par=key->parent; 06503 remove_color= key->color; 06504 } 06505 else 06506 { 06507 SEL_ARG *tmp=key->next; // next bigger key (exist!) 06508 nod= *tmp->parent_ptr()= tmp->right; // unlink tmp from tree 06509 fix_par=tmp->parent; 06510 if (nod != &null_element) 06511 nod->parent=fix_par; 06512 remove_color= tmp->color; 06513 06514 tmp->parent=key->parent; // Move node in place of key 06515 (tmp->left=key->left)->parent=tmp; 06516 if ((tmp->right=key->right) != &null_element) 06517 tmp->right->parent=tmp; 06518 tmp->color=key->color; 06519 *par=tmp; 06520 if (fix_par == key) // key->right == key->next 06521 fix_par=tmp; // new parent of nod 06522 } 06523 06524 if (root == &null_element) 06525 DBUG_RETURN(0); // Maybe root later 06526 if (remove_color == BLACK) 06527 root=rb_delete_fixup(root,nod,fix_par); 06528 test_rb_tree(root,root->parent); 06529 06530 root->use_count=this->use_count; // Fix root counters 06531 root->elements=this->elements-1; 06532 root->maybe_flag=this->maybe_flag; 06533 DBUG_RETURN(root); 06534 } 06535 06536 06537 /* Functions to fix up the tree after insert and delete */ 06538 06539 static void left_rotate(SEL_ARG **root,SEL_ARG *leaf) 06540 { 06541 SEL_ARG *y=leaf->right; 06542 leaf->right=y->left; 06543 if (y->left != &null_element) 06544 y->left->parent=leaf; 06545 if (!(y->parent=leaf->parent)) 06546 *root=y; 06547 else 06548 *leaf->parent_ptr()=y; 06549 y->left=leaf; 06550 leaf->parent=y; 06551 } 06552 06553 static void right_rotate(SEL_ARG **root,SEL_ARG *leaf) 06554 { 06555 SEL_ARG *y=leaf->left; 06556 leaf->left=y->right; 06557 if (y->right != &null_element) 06558 y->right->parent=leaf; 06559 if (!(y->parent=leaf->parent)) 06560 *root=y; 06561 else 06562 *leaf->parent_ptr()=y; 06563 y->right=leaf; 06564 leaf->parent=y; 06565 } 06566 06567 06568 SEL_ARG * 06569 SEL_ARG::rb_insert(SEL_ARG *leaf) 06570 { 06571 SEL_ARG *y,*par,*par2,*root; 06572 root= this; root->parent= 0; 06573 06574 leaf->color=RED; 06575 while (leaf != root && (par= leaf->parent)->color == RED) 06576 { // This can't be root or 1 level under 06577 if (par == (par2= leaf->parent->parent)->left) 06578 { 06579 y= par2->right; 06580 if (y->color == RED) 06581 { 06582 par->color=BLACK; 06583 y->color=BLACK; 06584 leaf=par2; 06585 leaf->color=RED; /* And the loop continues */ 06586 } 06587 else 06588 { 06589 if (leaf == par->right) 06590 { 06591 left_rotate(&root,leaf->parent); 06592 par=leaf; /* leaf is now parent to old leaf */ 06593 } 06594 par->color=BLACK; 06595 par2->color=RED; 06596 right_rotate(&root,par2); 06597 break; 06598 } 06599 } 06600 else 06601 { 06602 y= par2->left; 06603 if (y->color == RED) 06604 { 06605 par->color=BLACK; 06606 y->color=BLACK; 06607 leaf=par2; 06608 leaf->color=RED; /* And the loop continues */ 06609 } 06610 else 06611 { 06612 if (leaf == par->left) 06613 { 06614 right_rotate(&root,par); 06615 par=leaf; 06616 } 06617 par->color=BLACK; 06618 par2->color=RED; 06619 left_rotate(&root,par2); 06620 break; 06621 } 06622 } 06623 } 06624 root->color=BLACK; 06625 test_rb_tree(root,root->parent); 06626 return root; 06627 } 06628 06629 06630 SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par) 06631 { 06632 SEL_ARG *x,*w; 06633 root->parent=0; 06634 06635 x= key; 06636 while (x != root && x->color == SEL_ARG::BLACK) 06637 { 06638 if (x == par->left) 06639 { 06640 w=par->right; 06641 if (w->color == SEL_ARG::RED) 06642 { 06643 w->color=SEL_ARG::BLACK; 06644 par->color=SEL_ARG::RED; 06645 left_rotate(&root,par); 06646 w=par->right; 06647 } 06648 if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK) 06649 { 06650 w->color=SEL_ARG::RED; 06651 x=par; 06652 } 06653 else 06654 { 06655 if (w->right->color == SEL_ARG::BLACK) 06656 { 06657 w->left->color=SEL_ARG::BLACK; 06658 w->color=SEL_ARG::RED; 06659 right_rotate(&root,w); 06660 w=par->right; 06661 } 06662 w->color=par->color; 06663 par->color=SEL_ARG::BLACK; 06664 w->right->color=SEL_ARG::BLACK; 06665 left_rotate(&root,par); 06666 x=root; 06667 break; 06668 } 06669 } 06670 else 06671 { 06672 w=par->left; 06673 if (w->color == SEL_ARG::RED) 06674 { 06675 w->color=SEL_ARG::BLACK; 06676 par->color=SEL_ARG::RED; 06677 right_rotate(&root,par); 06678 w=par->left; 06679 } 06680 if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK) 06681 { 06682 w->color=SEL_ARG::RED; 06683 x=par; 06684 } 06685 else 06686 { 06687 if (w->left->color == SEL_ARG::BLACK) 06688 { 06689 w->right->color=SEL_ARG::BLACK; 06690 w->color=SEL_ARG::RED; 06691 left_rotate(&root,w); 06692 w=par->left; 06693 } 06694 w->color=par->color; 06695 par->color=SEL_ARG::BLACK; 06696 w->left->color=SEL_ARG::BLACK; 06697 right_rotate(&root,par); 06698 x=root; 06699 break; 06700 } 06701 } 06702 par=x->parent; 06703 } 06704 x->color=SEL_ARG::BLACK; 06705 return root; 06706 } 06707 06708 06709 /* Test that the properties for a red-black tree hold */ 06710 06711 #ifdef EXTRA_DEBUG 06712 int test_rb_tree(SEL_ARG *element,SEL_ARG *parent) 06713 { 06714 int count_l,count_r; 06715 06716 if (element == &null_element) 06717 return 0; // Found end of tree 06718 if (element->parent != parent) 06719 { 06720 sql_print_error("Wrong tree: Parent doesn't point at parent"); 06721 return -1; 06722 } 06723 if (element->color == SEL_ARG::RED && 06724 (element->left->color == SEL_ARG::RED || 06725 element->right->color == SEL_ARG::RED)) 06726 { 06727 sql_print_error("Wrong tree: Found two red in a row"); 06728 return -1; 06729 } 06730 if (element->left == element->right && element->left != &null_element) 06731 { // Dummy test 06732 sql_print_error("Wrong tree: Found right == left"); 06733 return -1; 06734 } 06735 count_l=test_rb_tree(element->left,element); 06736 count_r=test_rb_tree(element->right,element); 06737 if (count_l >= 0 && count_r >= 0) 06738 { 06739 if (count_l == count_r) 06740 return count_l+(element->color == SEL_ARG::BLACK); 06741 sql_print_error("Wrong tree: Incorrect black-count: %d - %d", 06742 count_l,count_r); 06743 } 06744 return -1; // Error, no more warnings 06745 } 06746 06747 06748 /* 06749 Count how many times SEL_ARG graph "root" refers to its part "key" 06750 06751 SYNOPSIS 06752 count_key_part_usage() 06753 root An RB-Root node in a SEL_ARG graph. 06754 key Another RB-Root node in that SEL_ARG graph. 06755 06756 DESCRIPTION 06757 The passed "root" node may refer to "key" node via root->next_key_part, 06758 root->next->n 06759 06760 This function counts how many times the node "key" is referred (via 06761 SEL_ARG::next_key_part) by 06762 - intervals of RB-tree pointed by "root", 06763 - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from 06764 intervals of RB-tree pointed by "root", 06765 - and so on. 06766 06767 Here is an example (horizontal links represent next_key_part pointers, 06768 vertical links - next/prev prev pointers): 06769 06770 +----+ $ 06771 |root|-----------------+ 06772 +----+ $ | 06773 | $ | 06774 | $ | 06775 +----+ +---+ $ | +---+ Here the return value 06776 | |- ... -| |---$-+--+->|key| will be 4. 06777 +----+ +---+ $ | | +---+ 06778 | $ | | 06779 ... $ | | 06780 | $ | | 06781 +----+ +---+ $ | | 06782 | |---| |---------+ | 06783 +----+ +---+ $ | 06784 | | $ | 06785 ... +---+ $ | 06786 | |------------+ 06787 +---+ $ 06788 RETURN 06789 Number of links to "key" from nodes reachable from "root". 06790 */ 06791 06792 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key) 06793 { 06794 ulong count= 0; 06795 for (root=root->first(); root ; root=root->next) 06796 { 06797 if (root->next_key_part) 06798 { 06799 if (root->next_key_part == key) 06800 count++; 06801 if (root->next_key_part->part < key->part) 06802 count+=count_key_part_usage(root->next_key_part,key); 06803 } 06804 } 06805 return count; 06806 } 06807 06808 06809 /* 06810 Check if SEL_ARG::use_count value is correct 06811 06812 SYNOPSIS 06813 SEL_ARG::test_use_count() 06814 root The root node of the SEL_ARG graph (an RB-tree root node that 06815 has the least value of sel_arg->part in the entire graph, and 06816 thus is the "origin" of the graph) 06817 06818 DESCRIPTION 06819 Check if SEL_ARG::use_count value is correct. See the definition of 06820 use_count for what is "correct". 06821 */ 06822 06823 void SEL_ARG::test_use_count(SEL_ARG *root) 06824 { 06825 uint e_count=0; 06826 if (this == root && use_count != 1) 06827 { 06828 sql_print_information("Use_count: Wrong count %lu for root",use_count); 06829 return; 06830 } 06831 if (this->type != SEL_ARG::KEY_RANGE) 06832 return; 06833 for (SEL_ARG *pos=first(); pos ; pos=pos->next) 06834 { 06835 e_count++; 06836 if (pos->next_key_part) 06837 { 06838 ulong count=count_key_part_usage(root,pos->next_key_part); 06839 if (count > pos->next_key_part->use_count) 06840 { 06841 sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu should be %lu", 06842 pos,pos->next_key_part->use_count,count); 06843 return; 06844 } 06845 pos->next_key_part->test_use_count(root); 06846 } 06847 } 06848 if (e_count != elements) 06849 sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx", 06850 e_count, elements, (gptr) this); 06851 } 06852 06853 #endif 06854 06855 06856 /* 06857 Calculate estimate of number records that will be retrieved by a range 06858 scan on given index using given SEL_ARG intervals tree. 06859 SYNOPSIS 06860 check_quick_select 06861 param Parameter from test_quick_select 06862 idx Number of index to use in tree->keys 06863 tree Transformed selection condition, tree->keys[idx] 06864 holds the range tree to be used for scanning. 06865 update_tbl_stats If true, update table->quick_keys with information 06866 about range scan we've evaluated. 06867 06868 NOTES 06869 param->is_ror_scan is set to reflect if the key scan is a ROR (see 06870 is_key_scan_ror function for more info) 06871 param->table->quick_*, param->range_count (and maybe others) are 06872 updated with data of given key scan, see check_quick_keys for details. 06873 06874 RETURN 06875 Estimate # of records to be retrieved. 06876 HA_POS_ERROR if estimate calculation failed due to table handler problems. 06877 06878 */ 06879 06880 static ha_rows 06881 check_quick_select(PARAM *param,uint idx,SEL_ARG *tree, bool update_tbl_stats) 06882 { 06883 ha_rows records; 06884 bool cpk_scan; 06885 uint key; 06886 DBUG_ENTER("check_quick_select"); 06887 06888 param->is_ror_scan= FALSE; 06889 06890 if (!tree) 06891 DBUG_RETURN(HA_POS_ERROR); // Can't use it 06892 param->max_key_part=0; 06893 param->range_count=0; 06894 key= param->real_keynr[idx]; 06895 06896 if (tree->type == SEL_ARG::IMPOSSIBLE) 06897 DBUG_RETURN(0L); // Impossible select. return 06898 if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0) 06899 DBUG_RETURN(HA_POS_ERROR); // Don't use tree 06900 06901 enum ha_key_alg key_alg= param->table->key_info[key].algorithm; 06902 if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) 06903 { 06904 /* Records are not ordered by rowid for other types of indexes. */ 06905 cpk_scan= FALSE; 06906 } 06907 else 06908 { 06909 /* 06910 Clustered PK scan is a special case, check_quick_keys doesn't recognize 06911 CPK scans as ROR scans (while actually any CPK scan is a ROR scan). 06912 */ 06913 cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) && 06914 param->table->file->primary_key_is_clustered()); 06915 param->is_ror_scan= !cpk_scan; 06916 } 06917 param->n_ranges= 0; 06918 06919 records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); 06920 if (records != HA_POS_ERROR) 06921 { 06922 if (update_tbl_stats) 06923 { 06924 param->table->quick_keys.set_bit(key); 06925 param->table->quick_key_parts[key]=param->max_key_part+1; 06926 param->table->quick_n_ranges[key]= param->n_ranges; 06927 param->table->quick_condition_rows= 06928 min(param->table->quick_condition_rows, records); 06929 } 06930 /* 06931 Need to save quick_rows in any case as it is used when calculating 06932 cost of ROR intersection: 06933 */ 06934 param->table->quick_rows[key]=records; 06935 if (cpk_scan) 06936 param->is_ror_scan= TRUE; 06937 } 06938 if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR) 06939 param->is_ror_scan= FALSE; 06940 DBUG_PRINT("exit", ("Records: %lu", (ulong) records)); 06941 DBUG_RETURN(records); 06942 } 06943 06944 06945 /* 06946 Recursively calculate estimate of # rows that will be retrieved by 06947 key scan on key idx. 06948 SYNOPSIS 06949 check_quick_keys() 06950 param Parameter from test_quick select function. 06951 idx Number of key to use in PARAM::keys in list of used keys 06952 (param->real_keynr[idx] holds the key number in table) 06953 key_tree SEL_ARG tree being examined. 06954 min_key Buffer with partial min key value tuple 06955 min_key_flag 06956 max_key Buffer with partial max key value tuple 06957 max_key_flag 06958 06959 NOTES 06960 The function does the recursive descent on the tree via SEL_ARG::left, 06961 SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates 06962 are calculated using records_in_range calls at the leaf nodes and then 06963 summed. 06964 06965 param->min_key and param->max_key are used to hold prefixes of key value 06966 tuples. 06967 06968 The side effects are: 06969 06970 param->max_key_part is updated to hold the maximum number of key parts used 06971 in scan minus 1. 06972 06973 param->range_count is incremented if the function finds a range that 06974 wasn't counted by the caller. 06975 06976 param->is_ror_scan is cleared if the function detects that the key scan is 06977 not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror 06978 function for description of which key scans are ROR scans) 06979 */ 06980 06981 static ha_rows 06982 check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, 06983 char *min_key,uint min_key_flag, char *max_key, 06984 uint max_key_flag) 06985 { 06986 ha_rows records=0, tmp; 06987 uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length; 06988 char *tmp_min_key, *tmp_max_key; 06989 06990 param->max_key_part=max(param->max_key_part,key_tree->part); 06991 if (key_tree->left != &null_element) 06992 { 06993 /* 06994 There are at least two intervals for current key part, i.e. condition 06995 was converted to something like 06996 (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). 06997 This is not a ROR scan if the key is not Clustered Primary Key. 06998 */ 06999 param->is_ror_scan= FALSE; 07000 records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag, 07001 max_key,max_key_flag); 07002 if (records == HA_POS_ERROR) // Impossible 07003 return records; 07004 } 07005 07006 tmp_min_key= min_key; 07007 tmp_max_key= max_key; 07008 key_tree->store(param->key[idx][key_tree->part].store_length, 07009 &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag); 07010 min_key_length= (uint) (tmp_min_key- param->min_key); 07011 max_key_length= (uint) (tmp_max_key- param->max_key); 07012 07013 if (param->is_ror_scan) 07014 { 07015 /* 07016 If the index doesn't cover entire key, mark the scan as non-ROR scan. 07017 Actually we're cutting off some ROR scans here. 07018 */ 07019 uint16 fieldnr= param->table->key_info[param->real_keynr[idx]]. 07020 key_part[key_tree->part].fieldnr - 1; 07021 if (param->table->field[fieldnr]->key_length() != 07022 param->key[idx][key_tree->part].length) 07023 param->is_ror_scan= FALSE; 07024 } 07025 07026 if (key_tree->next_key_part && 07027 key_tree->next_key_part->part == key_tree->part+1 && 07028 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) 07029 { // const key as prefix 07030 if (min_key_length == max_key_length && 07031 !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && 07032 !key_tree->min_flag && !key_tree->max_flag) 07033 { 07034 tmp=check_quick_keys(param,idx,key_tree->next_key_part, 07035 tmp_min_key, min_key_flag | key_tree->min_flag, 07036 tmp_max_key, max_key_flag | key_tree->max_flag); 07037 goto end; // Ugly, but efficient 07038 } 07039 else 07040 { 07041 /* The interval for current key part is not c1 <= keyXpartY <= c1 */ 07042 param->is_ror_scan= FALSE; 07043 } 07044 07045 tmp_min_flag=key_tree->min_flag; 07046 tmp_max_flag=key_tree->max_flag; 07047 if (!tmp_min_flag) 07048 key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key, 07049 &tmp_min_flag); 07050 if (!tmp_max_flag) 07051 key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key, 07052 &tmp_max_flag); 07053 min_key_length= (uint) (tmp_min_key- param->min_key); 07054 max_key_length= (uint) (tmp_max_key- param->max_key); 07055 } 07056 else 07057 { 07058 tmp_min_flag=min_key_flag | key_tree->min_flag; 07059 tmp_max_flag=max_key_flag | key_tree->max_flag; 07060 } 07061 07062 keynr=param->real_keynr[idx]; 07063 param->range_count++; 07064 if (!tmp_min_flag && ! tmp_max_flag && 07065 (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts && 07066 (param->table->key_info[keynr].flags & (HA_NOSAME | HA_END_SPACE_KEY)) == 07067 HA_NOSAME && 07068 min_key_length == max_key_length && 07069 !memcmp(param->min_key,param->max_key,min_key_length)) 07070 { 07071 tmp=1; // Max one record 07072 param->n_ranges++; 07073 } 07074 else 07075 { 07076 if (param->is_ror_scan) 07077 { 07078 /* 07079 If we get here, the condition on the key was converted to form 07080 "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND 07081 somecond(keyXpart{key_tree->part})" 07082 Check if 07083 somecond is "keyXpart{key_tree->part} = const" and 07084 uncovered "tail" of KeyX parts is either empty or is identical to 07085 first members of clustered primary key. 07086 */ 07087 if (!(min_key_length == max_key_length && 07088 !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && 07089 !key_tree->min_flag && !key_tree->max_flag && 07090 is_key_scan_ror(param, keynr, key_tree->part + 1))) 07091 param->is_ror_scan= FALSE; 07092 } 07093 param->n_ranges++; 07094 07095 if (tmp_min_flag & GEOM_FLAG) 07096 { 07097 key_range min_range; 07098 min_range.key= (byte*) param->min_key; 07099 min_range.length= min_key_length; 07100 /* In this case tmp_min_flag contains the handler-read-function */ 07101 min_range.flag= (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG); 07102 07103 tmp= param->table->file->records_in_range(keynr, &min_range, 07104 (key_range*) 0); 07105 } 07106 else 07107 { 07108 key_range min_range, max_range; 07109 07110 min_range.key= (byte*) param->min_key; 07111 min_range.length= min_key_length; 07112 min_range.flag= (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY : 07113 HA_READ_KEY_EXACT); 07114 max_range.key= (byte*) param->max_key; 07115 max_range.length= max_key_length; 07116 max_range.flag= (tmp_max_flag & NEAR_MAX ? 07117 HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); 07118 tmp=param->table->file->records_in_range(keynr, 07119 (min_key_length ? &min_range : 07120 (key_range*) 0), 07121 (max_key_length ? &max_range : 07122 (key_range*) 0)); 07123 } 07124 } 07125 end: 07126 if (tmp == HA_POS_ERROR) // Impossible range 07127 return tmp; 07128 records+=tmp; 07129 if (key_tree->right != &null_element) 07130 { 07131 /* 07132 There are at least two intervals for current key part, i.e. condition 07133 was converted to something like 07134 (keyXpartY less/equals c1) OR (keyXpartY more/equals c2). 07135 This is not a ROR scan if the key is not Clustered Primary Key. 07136 */ 07137 param->is_ror_scan= FALSE; 07138 tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag, 07139 max_key,max_key_flag); 07140 if (tmp == HA_POS_ERROR) 07141 return tmp; 07142 records+=tmp; 07143 } 07144 return records; 07145 } 07146 07147 07148 /* 07149 Check if key scan on given index with equality conditions on first n key 07150 parts is a ROR scan. 07151 07152 SYNOPSIS 07153 is_key_scan_ror() 07154 param Parameter from test_quick_select 07155 keynr Number of key in the table. The key must not be a clustered 07156 primary key. 07157 nparts Number of first key parts for which equality conditions 07158 are present. 07159 07160 NOTES 07161 ROR (Rowid Ordered Retrieval) key scan is a key scan that produces 07162 ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function) 07163 07164 An index scan is a ROR scan if it is done using a condition in form 07165 07166 "key1_1=c_1 AND ... AND key1_n=c_n" (1) 07167 07168 where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n]) 07169 07170 and the table has a clustered Primary Key 07171 07172 PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being 07173 identical to uncovered parts ot the key being scanned (2) 07174 07175 Scans on HASH indexes are not ROR scans, 07176 any range scan on clustered primary key is ROR scan (3) 07177 07178 Check (1) is made in check_quick_keys() 07179 Check (3) is made check_quick_select() 07180 Check (2) is made by this function. 07181 07182 RETURN 07183 TRUE If the scan is ROR-scan 07184 FALSE otherwise 07185 */ 07186 07187 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) 07188 { 07189 KEY *table_key= param->table->key_info + keynr; 07190 KEY_PART_INFO *key_part= table_key->key_part + nparts; 07191 KEY_PART_INFO *key_part_end= (table_key->key_part + 07192 table_key->key_parts); 07193 uint pk_number; 07194 07195 if (key_part == key_part_end) 07196 return TRUE; 07197 pk_number= param->table->s->primary_key; 07198 if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY) 07199 return FALSE; 07200 07201 KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part; 07202 KEY_PART_INFO *pk_part_end= pk_part + 07203 param->table->key_info[pk_number].key_parts; 07204 for (;(key_part!=key_part_end) && (pk_part != pk_part_end); 07205 ++key_part, ++pk_part) 07206 { 07207 if ((key_part->field != pk_part->field) || 07208 (key_part->length != pk_part->length)) 07209 return FALSE; 07210 } 07211 return (key_part == key_part_end); 07212 } 07213 07214 07215 /* 07216 Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. 07217 07218 SYNOPSIS 07219 get_quick_select() 07220 param 07221 idx Index of used key in param->key. 07222 key_tree SEL_ARG tree for the used key 07223 parent_alloc If not NULL, use it to allocate memory for 07224 quick select data. Otherwise use quick->alloc. 07225 NOTES 07226 The caller must call QUICK_SELECT::init for returned quick select 07227 07228 CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be 07229 deallocated when the returned quick select is deleted. 07230 07231 RETURN 07232 NULL on error 07233 otherwise created quick select 07234 */ 07235 07236 QUICK_RANGE_SELECT * 07237 get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, 07238 MEM_ROOT *parent_alloc) 07239 { 07240 QUICK_RANGE_SELECT *quick; 07241 DBUG_ENTER("get_quick_select"); 07242 07243 if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL) 07244 quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table, 07245 param->real_keynr[idx], 07246 test(parent_alloc), 07247 parent_alloc); 07248 else 07249 quick=new QUICK_RANGE_SELECT(param->thd, param->table, 07250 param->real_keynr[idx], 07251 test(parent_alloc)); 07252 07253 if (quick) 07254 { 07255 if (quick->error || 07256 get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0, 07257 param->max_key,0)) 07258 { 07259 delete quick; 07260 quick=0; 07261 } 07262 else 07263 { 07264 quick->key_parts=(KEY_PART*) 07265 memdup_root(parent_alloc? parent_alloc : &quick->alloc, 07266 (char*) param->key[idx], 07267 sizeof(KEY_PART)* 07268 param->table->key_info[param->real_keynr[idx]].key_parts); 07269 } 07270 } 07271 DBUG_RETURN(quick); 07272 } 07273 07274 07275 /* 07276 ** Fix this to get all possible sub_ranges 07277 */ 07278 bool 07279 get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, 07280 SEL_ARG *key_tree,char *min_key,uint min_key_flag, 07281 char *max_key, uint max_key_flag) 07282 { 07283 QUICK_RANGE *range; 07284 uint flag; 07285 07286 if (key_tree->left != &null_element) 07287 { 07288 if (get_quick_keys(param,quick,key,key_tree->left, 07289 min_key,min_key_flag, max_key, max_key_flag)) 07290 return 1; 07291 } 07292 char *tmp_min_key=min_key,*tmp_max_key=max_key; 07293 key_tree->store(key[key_tree->part].store_length, 07294 &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag); 07295 07296 if (key_tree->next_key_part && 07297 key_tree->next_key_part->part == key_tree->part+1 && 07298 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) 07299 { // const key as prefix 07300 if (!((tmp_min_key - min_key) != (tmp_max_key - max_key) || 07301 memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) || 07302 key_tree->min_flag || key_tree->max_flag)) 07303 { 07304 if (get_quick_keys(param,quick,key,key_tree->next_key_part, 07305 tmp_min_key, min_key_flag | key_tree->min_flag, 07306 tmp_max_key, max_key_flag | key_tree->max_flag)) 07307 return 1; 07308 goto end; // Ugly, but efficient 07309 } 07310 { 07311 uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag; 07312 if (!tmp_min_flag) 07313 key_tree->next_key_part->store_min_key(key, &tmp_min_key, 07314 &tmp_min_flag); 07315 if (!tmp_max_flag) 07316 key_tree->next_key_part->store_max_key(key, &tmp_max_key, 07317 &tmp_max_flag); 07318 flag=tmp_min_flag | tmp_max_flag; 07319 } 07320 } 07321 else 07322 { 07323 flag = (key_tree->min_flag & GEOM_FLAG) ? 07324 key_tree->min_flag : key_tree->min_flag | key_tree->max_flag; 07325 } 07326 07327 /* 07328 Ensure that some part of min_key and max_key are used. If not, 07329 regard this as no lower/upper range 07330 */ 07331 if ((flag & GEOM_FLAG) == 0) 07332 { 07333 if (tmp_min_key != param->min_key) 07334 flag&= ~NO_MIN_RANGE; 07335 else 07336 flag|= NO_MIN_RANGE; 07337 if (tmp_max_key != param->max_key) 07338 flag&= ~NO_MAX_RANGE; 07339 else 07340 flag|= NO_MAX_RANGE; 07341 } 07342 if (flag == 0) 07343 { 07344 uint length= (uint) (tmp_min_key - param->min_key); 07345 if (length == (uint) (tmp_max_key - param->max_key) && 07346 !memcmp(param->min_key,param->max_key,length)) 07347 { 07348 KEY *table_key=quick->head->key_info+quick->index; 07349 flag=EQ_RANGE; 07350 if ((table_key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME && 07351 key->part == table_key->key_parts-1) 07352 { 07353 if (!(table_key->flags & HA_NULL_PART_KEY) || 07354 !null_part_in_key(key, 07355 param->min_key, 07356 (uint) (tmp_min_key - param->min_key))) 07357 flag|= UNIQUE_RANGE; 07358 else 07359 flag|= NULL_RANGE; 07360 } 07361 } 07362 } 07363 07364 /* Get range for retrieving rows in QUICK_SELECT::get_next */ 07365 if (!(range= new QUICK_RANGE((const char *) param->min_key, 07366 (uint) (tmp_min_key - param->min_key), 07367 (const char *) param->max_key, 07368 (uint) (tmp_max_key - param->max_key), 07369 flag))) 07370 return 1; // out of memory 07371 07372 set_if_bigger(quick->max_used_key_length,range->min_length); 07373 set_if_bigger(quick->max_used_key_length,range->max_length); 07374 set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1); 07375 if (insert_dynamic(&quick->ranges, (gptr)&range)) 07376 return 1; 07377 07378 end: 07379 if (key_tree->right != &null_element) 07380 return get_quick_keys(param,quick,key,key_tree->right, 07381 min_key,min_key_flag, 07382 max_key,max_key_flag); 07383 return 0; 07384 } 07385 07386 /* 07387 Return 1 if there is only one range and this uses the whole primary key 07388 */ 07389 07390 bool QUICK_RANGE_SELECT::unique_key_range() 07391 { 07392 if (ranges.elements == 1) 07393 { 07394 QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer); 07395 if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE) 07396 { 07397 KEY *key=head->key_info+index; 07398 return ((key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME && 07399 key->key_length == tmp->min_length); 07400 } 07401 } 07402 return 0; 07403 } 07404 07405 07406 /* Returns TRUE if any part of the key is NULL */ 07407 07408 static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length) 07409 { 07410 for (const char *end=key+length ; 07411 key < end; 07412 key+= key_part++->store_length) 07413 { 07414 if (key_part->null_bit && *key) 07415 return 1; 07416 } 07417 return 0; 07418 } 07419 07420 07421 bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields) 07422 { 07423 return check_if_key_used(head, index, *fields); 07424 } 07425 07426 bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields) 07427 { 07428 QUICK_RANGE_SELECT *quick; 07429 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); 07430 while ((quick= it++)) 07431 { 07432 if (check_if_key_used(head, quick->index, *fields)) 07433 return 1; 07434 } 07435 return 0; 07436 } 07437 07438 bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields) 07439 { 07440 QUICK_RANGE_SELECT *quick; 07441 List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); 07442 while ((quick= it++)) 07443 { 07444 if (check_if_key_used(head, quick->index, *fields)) 07445 return 1; 07446 } 07447 return 0; 07448 } 07449 07450 bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields) 07451 { 07452 QUICK_SELECT_I *quick; 07453 List_iterator_fast<QUICK_SELECT_I> it(quick_selects); 07454 while ((quick= it++)) 07455 { 07456 if (quick->check_if_keys_used(fields)) 07457 return 1; 07458 } 07459 return 0; 07460 } 07461 07462 07463 /* 07464 Create quick select from ref/ref_or_null scan. 07465 07466 SYNOPSIS 07467 get_quick_select_for_ref() 07468 thd Thread handle 07469 table Table to access 07470 ref ref[_or_null] scan parameters 07471 records Estimate of number of records (needed only to construct 07472 quick select) 07473 NOTES 07474 This allocates things in a new memory root, as this may be called many 07475 times during a query. 07476 07477 RETURN 07478 Quick select that retrieves the same rows as passed ref scan 07479 NULL on error. 07480 */ 07481 07482 QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, 07483 TABLE_REF *ref, ha_rows records) 07484 { 07485 MEM_ROOT *old_root, *alloc; 07486 QUICK_RANGE_SELECT *quick; 07487 KEY *key_info = &table->key_info[ref->key]; 07488 KEY_PART *key_part; 07489 QUICK_RANGE *range; 07490 uint part; 07491 07492 old_root= thd->mem_root; 07493 /* The following call may change thd->mem_root */ 07494 quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0); 07495 /* save mem_root set by QUICK_RANGE_SELECT constructor */ 07496 alloc= thd->mem_root; 07497 /* 07498 return back default mem_root (thd->mem_root) changed by 07499 QUICK_RANGE_SELECT constructor 07500 */ 07501 thd->mem_root= old_root; 07502 07503 if (!quick) 07504 return 0; /* no ranges found */ 07505 if (quick->init()) 07506 goto err; 07507 quick->records= records; 07508 07509 if (cp_buffer_from_ref(thd, table, ref) && thd->is_fatal_error || 07510 !(range= new(alloc) QUICK_RANGE())) 07511 goto err; // out of memory 07512 07513 range->min_key=range->max_key=(char*) ref->key_buff; 07514 range->min_length=range->max_length=ref->key_length; 07515 range->flag= ((ref->key_length == key_info->key_length && 07516 (key_info->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == 07517 HA_NOSAME) ? EQ_RANGE : 0); 07518 07519 if (!(quick->key_parts=key_part=(KEY_PART *) 07520 alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts))) 07521 goto err; 07522 07523 for (part=0 ; part < ref->key_parts ;part++,key_part++) 07524 { 07525 key_part->part=part; 07526 key_part->field= key_info->key_part[part].field; 07527 key_part->length= key_info->key_part[part].length; 07528 key_part->store_length= key_info->key_part[part].store_length; 07529 key_part->null_bit= key_info->key_part[part].null_bit; 07530 } 07531 if (insert_dynamic(&quick->ranges,(gptr)&range)) 07532 goto err; 07533 07534 /* 07535 Add a NULL range if REF_OR_NULL optimization is used. 07536 For example: 07537 if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above 07538 and have ref->null_ref_key set. Will create a new NULL range here. 07539 */ 07540 if (ref->null_ref_key) 07541 { 07542 QUICK_RANGE *null_range; 07543 07544 *ref->null_ref_key= 1; // Set null byte then create a range 07545 if (!(null_range= new (alloc) QUICK_RANGE((char*)ref->key_buff, 07546 ref->key_length, 07547 (char*)ref->key_buff, 07548 ref->key_length, 07549 EQ_RANGE))) 07550 goto err; 07551 *ref->null_ref_key= 0; // Clear null byte 07552 if (insert_dynamic(&quick->ranges,(gptr)&null_range)) 07553 goto err; 07554 } 07555 07556 return quick; 07557 07558 err: 07559 delete quick; 07560 return 0; 07561 } 07562 07563 07564 /* 07565 Perform key scans for all used indexes (except CPK), get rowids and merge 07566 them into an ordered non-recurrent sequence of rowids. 07567 07568 The merge/duplicate removal is performed using Unique class. We put all 07569 rowids into Unique, get the sorted sequence and destroy the Unique. 07570 07571 If table has a clustered primary key that covers all rows (TRUE for bdb 07572 and innodb currently) and one of the index_merge scans is a scan on PK, 07573 then rows that will be retrieved by PK scan are not put into Unique and 07574 primary key scan is not performed here, it is performed later separately. 07575 07576 RETURN 07577 0 OK 07578 other error 07579 */ 07580 07581 int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() 07582 { 07583 List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects); 07584 QUICK_RANGE_SELECT* cur_quick; 07585 int result; 07586 Unique *unique; 07587 MY_BITMAP *save_read_set, *save_write_set; 07588 handler *file= head->file; 07589 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge"); 07590 07591 /* We're going to just read rowids. */ 07592 save_read_set= head->read_set; 07593 save_write_set= head->write_set; 07594 file->extra(HA_EXTRA_KEYREAD); 07595 bitmap_clear_all(&head->tmp_set); 07596 head->column_bitmaps_set(&head->tmp_set, &head->tmp_set); 07597 head->prepare_for_position(); 07598 07599 cur_quick_it.rewind(); 07600 cur_quick= cur_quick_it++; 07601 DBUG_ASSERT(cur_quick != 0); 07602 07603 /* 07604 We reuse the same instance of handler so we need to call both init and 07605 reset here. 07606 */ 07607 if (cur_quick->init() || cur_quick->reset()) 07608 DBUG_RETURN(1); 07609 07610 unique= new Unique(refpos_order_cmp, (void *)file, 07611 file->ref_length, 07612 thd->variables.sortbuff_size); 07613 if (!unique) 07614 DBUG_RETURN(1); 07615 for (;;) 07616 { 07617 while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE) 07618 { 07619 cur_quick->range_end(); 07620 cur_quick= cur_quick_it++; 07621 if (!cur_quick) 07622 break; 07623 07624 if (cur_quick->file->inited != handler::NONE) 07625 cur_quick->file->ha_index_end(); 07626 if (cur_quick->init() || cur_quick->reset()) 07627 DBUG_RETURN(1); 07628 } 07629 07630 if (result) 07631 { 07632 if (result != HA_ERR_END_OF_FILE) 07633 { 07634 cur_quick->range_end(); 07635 DBUG_RETURN(result); 07636 } 07637 break; 07638 } 07639 07640 if (thd->killed) 07641 DBUG_RETURN(1); 07642 07643 /* skip row if it will be retrieved by clustered PK scan */ 07644 if (pk_quick_select && pk_quick_select->row_in_ranges()) 07645 continue; 07646 07647 cur_quick->file->position(cur_quick->record); 07648 result= unique->unique_add((char*)cur_quick->file->ref); 07649 if (result) 07650 DBUG_RETURN(1); 07651 07652 } 07653 07654 DBUG_PRINT("info", ("ok")); 07655 /* ok, all row ids are in Unique */ 07656 result= unique->get(head); 07657 delete unique; 07658 doing_pk_scan= FALSE; 07659 /* index_merge currently doesn't support "using index" at all */ 07660 file->extra(HA_EXTRA_NO_KEYREAD); 07661 head->column_bitmaps_set(save_read_set, save_write_set); 07662 /* start table scan */ 07663 init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1); 07664 DBUG_RETURN(result); 07665 } 07666 07667 07668 /* 07669 Get next row for index_merge. 07670 NOTES 07671 The rows are read from 07672 1. rowids stored in Unique. 07673 2. QUICK_RANGE_SELECT with clustered primary key (if any). 07674 The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint. 07675 */ 07676 07677 int QUICK_INDEX_MERGE_SELECT::get_next() 07678 { 07679 int result; 07680 DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next"); 07681 07682 if (doing_pk_scan) 07683 DBUG_RETURN(pk_quick_select->get_next()); 07684 07685 if ((result= read_record.read_record(&read_record)) == -1) 07686 { 07687 result= HA_ERR_END_OF_FILE; 07688 end_read_record(&read_record); 07689 /* All rows from Unique have been retrieved, do a clustered PK scan */ 07690 if (pk_quick_select) 07691 { 07692 doing_pk_scan= TRUE; 07693 if ((result= pk_quick_select->init()) || 07694 (result= pk_quick_select->reset())) 07695 DBUG_RETURN(result); 07696 DBUG_RETURN(pk_quick_select->get_next()); 07697 } 07698 } 07699 07700 DBUG_RETURN(result); 07701 } 07702 07703 07704 /* 07705 Retrieve next record. 07706 SYNOPSIS 07707 QUICK_ROR_INTERSECT_SELECT::get_next() 07708 07709 NOTES 07710 Invariant on enter/exit: all intersected selects have retrieved all index 07711 records with rowid <= some_rowid_val and no intersected select has 07712 retrieved any index records with rowid > some_rowid_val. 07713 We start fresh and loop until we have retrieved the same rowid in each of 07714 the key scans or we got an error. 07715 07716 If a Clustered PK scan is present, it is used only to check if row 07717 satisfies its condition (and never used for row retrieval). 07718 07719 RETURN 07720 0 - Ok 07721 other - Error code if any error occurred. 07722 */ 07723 07724 int QUICK_ROR_INTERSECT_SELECT::get_next() 07725 { 07726 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); 07727 QUICK_RANGE_SELECT* quick; 07728 int error, cmp; 07729 uint last_rowid_count=0; 07730 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next"); 07731 07732 do 07733 { 07734 /* Get a rowid for first quick and save it as a 'candidate' */ 07735 quick= quick_it++; 07736 error= quick->get_next(); 07737 if (cpk_quick) 07738 { 07739 while (!error && !cpk_quick->row_in_ranges()) 07740 error= quick->get_next(); 07741 } 07742 if (error) 07743 DBUG_RETURN(error); 07744 07745 quick->file->position(quick->record); 07746 memcpy(last_rowid, quick->file->ref, head->file->ref_length); 07747 last_rowid_count= 1; 07748 07749 while (last_rowid_count < quick_selects.elements) 07750 { 07751 if (!(quick= quick_it++)) 07752 { 07753 quick_it.rewind(); 07754 quick= quick_it++; 07755 } 07756 07757 do 07758 { 07759 if ((error= quick->get_next())) 07760 DBUG_RETURN(error); 07761 quick->file->position(quick->record); 07762 cmp= head->file->cmp_ref(quick->file->ref, last_rowid); 07763 } while (cmp < 0); 07764 07765 /* Ok, current select 'caught up' and returned ref >= cur_ref */ 07766 if (cmp > 0) 07767 { 07768 /* Found a row with ref > cur_ref. Make it a new 'candidate' */ 07769 if (cpk_quick) 07770 { 07771 while (!cpk_quick->row_in_ranges()) 07772 { 07773 if ((error= quick->get_next())) 07774 DBUG_RETURN(error); 07775 } 07776 } 07777 memcpy(last_rowid, quick->file->ref, head->file->ref_length); 07778 last_rowid_count= 1; 07779 } 07780 else 07781 { 07782 /* current 'candidate' row confirmed by this select */ 07783 last_rowid_count++; 07784 } 07785 } 07786 07787 /* We get here if we got the same row ref in all scans. */ 07788 if (need_to_fetch_row) 07789 error= head->file->rnd_pos(head->record[0], last_rowid); 07790 } while (error == HA_ERR_RECORD_DELETED); 07791 DBUG_RETURN(error); 07792 } 07793 07794 07795 /* 07796 Retrieve next record. 07797 SYNOPSIS 07798 QUICK_ROR_UNION_SELECT::get_next() 07799 07800 NOTES 07801 Enter/exit invariant: 07802 For each quick select in the queue a {key,rowid} tuple has been 07803 retrieved but the corresponding row hasn't been passed to output. 07804 07805 RETURN 07806 0 - Ok 07807 other - Error code if any error occurred. 07808 */ 07809 07810 int QUICK_ROR_UNION_SELECT::get_next() 07811 { 07812 int error, dup_row; 07813 QUICK_SELECT_I *quick; 07814 byte *tmp; 07815 DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next"); 07816 07817 do 07818 { 07819 do 07820 { 07821 if (!queue.elements) 07822 DBUG_RETURN(HA_ERR_END_OF_FILE); 07823 /* Ok, we have a queue with >= 1 scans */ 07824 07825 quick= (QUICK_SELECT_I*)queue_top(&queue); 07826 memcpy(cur_rowid, quick->last_rowid, rowid_length); 07827 07828 /* put into queue rowid from the same stream as top element */ 07829 if ((error= quick->get_next())) 07830 { 07831 if (error != HA_ERR_END_OF_FILE) 07832 DBUG_RETURN(error); 07833 queue_remove(&queue, 0); 07834 } 07835 else 07836 { 07837 quick->save_last_pos(); 07838 queue_replaced(&queue); 07839 } 07840 07841 if (!have_prev_rowid) 07842 { 07843 /* No rows have been returned yet */ 07844 dup_row= FALSE; 07845 have_prev_rowid= TRUE; 07846 } 07847 else 07848 dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid); 07849 } while (dup_row); 07850 07851 tmp= cur_rowid; 07852 cur_rowid= prev_rowid; 07853 prev_rowid= tmp; 07854 07855 error= head->file->rnd_pos(quick->record, prev_rowid); 07856 } while (error == HA_ERR_RECORD_DELETED); 07857 DBUG_RETURN(error); 07858 } 07859 07860 07861 int QUICK_RANGE_SELECT::reset() 07862 { 07863 uint mrange_bufsiz; 07864 byte *mrange_buff; 07865 DBUG_ENTER("QUICK_RANGE_SELECT::reset"); 07866 next=0; 07867 range= NULL; 07868 in_range= FALSE; 07869 cur_range= (QUICK_RANGE**) ranges.buffer; 07870 07871 if (file->inited == handler::NONE && (error= file->ha_index_init(index,1))) 07872 DBUG_RETURN(error); 07873 07874 /* Do not allocate the buffers twice. */ 07875 if (multi_range_length) 07876 { 07877 DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements)); 07878 DBUG_RETURN(0); 07879 } 07880 07881 /* Allocate the ranges array. */ 07882 DBUG_ASSERT(ranges.elements); 07883 multi_range_length= min(multi_range_count, ranges.elements); 07884 DBUG_ASSERT(multi_range_length > 0); 07885 while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*) 07886 my_malloc(multi_range_length * 07887 sizeof(KEY_MULTI_RANGE), 07888 MYF(MY_WME)))) 07889 { 07890 /* Try to shrink the buffers until it is 0. */ 07891 multi_range_length/= 2; 07892 } 07893 if (! multi_range) 07894 { 07895 multi_range_length= 0; 07896 DBUG_RETURN(HA_ERR_OUT_OF_MEM); 07897 } 07898 07899 /* Allocate the handler buffer if necessary. */ 07900 if (file->ha_table_flags() & HA_NEED_READ_RANGE_BUFFER) 07901 { 07902 mrange_bufsiz= min(multi_range_bufsiz, 07903 (QUICK_SELECT_I::records + 1)* head->s->reclength); 07904 07905 while (mrange_bufsiz && 07906 ! my_multi_malloc(MYF(MY_WME), 07907 &multi_range_buff, sizeof(*multi_range_buff), 07908 &mrange_buff, mrange_bufsiz, 07909 NullS)) 07910 { 07911 /* Try to shrink the buffers until both are 0. */ 07912 mrange_bufsiz/= 2; 07913 } 07914 if (! multi_range_buff) 07915 { 07916 my_free((char*) multi_range, MYF(0)); 07917 multi_range= NULL; 07918 multi_range_length= 0; 07919 DBUG_RETURN(HA_ERR_OUT_OF_MEM); 07920 } 07921 07922 /* Initialize the handler buffer. */ 07923 multi_range_buff->buffer= mrange_buff; 07924 multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz; 07925 multi_range_buff->end_of_used_area= mrange_buff; 07926 #ifdef HAVE_purify 07927 /* 07928 We need this until ndb will use the buffer efficiently 07929 (Now ndb stores complete row in here, instead of only the used fields 07930 which gives us valgrind warnings in compare_record[]) 07931 */ 07932 bzero((char*) mrange_buff, mrange_bufsiz); 07933 #endif 07934 } 07935 DBUG_RETURN(0); 07936 } 07937 07938 07939 /* 07940 Get next possible record using quick-struct. 07941 07942 SYNOPSIS 07943 QUICK_RANGE_SELECT::get_next() 07944 07945

