00001 /****************************************************** 00002 Index page routines 00003 00004 (c) 1994-1996 Innobase Oy 00005 00006 Created 2/2/1994 Heikki Tuuri 00007 *******************************************************/ 00008 00009 #define THIS_MODULE 00010 #include "page0page.h" 00011 #ifdef UNIV_NONINL 00012 #include "page0page.ic" 00013 #endif 00014 #undef THIS_MODULE 00015 00016 #include "page0cur.h" 00017 #include "lock0lock.h" 00018 #include "fut0lst.h" 00019 #include "btr0sea.h" 00020 #include "buf0buf.h" 00021 #include "srv0srv.h" 00022 #include "btr0btr.h" 00023 00024 /* THE INDEX PAGE 00025 ============== 00026 00027 The index page consists of a page header which contains the page's 00028 id and other information. On top of it are the the index records 00029 in a heap linked into a one way linear list according to alphabetic order. 00030 00031 Just below page end is an array of pointers which we call page directory, 00032 to about every sixth record in the list. The pointers are placed in 00033 the directory in the alphabetical order of the records pointed to, 00034 enabling us to make binary search using the array. Each slot n:o I 00035 in the directory points to a record, where a 4-bit field contains a count 00036 of those records which are in the linear list between pointer I and 00037 the pointer I - 1 in the directory, including the record 00038 pointed to by pointer I and not including the record pointed to by I - 1. 00039 We say that the record pointed to by slot I, or that slot I, owns 00040 these records. The count is always kept in the range 4 to 8, with 00041 the exception that it is 1 for the first slot, and 1--8 for the second slot. 00042 00043 An essentially binary search can be performed in the list of index 00044 records, like we could do if we had pointer to every record in the 00045 page directory. The data structure is, however, more efficient when 00046 we are doing inserts, because most inserts are just pushed on a heap. 00047 Only every 8th insert requires block move in the directory pointer 00048 table, which itself is quite small. A record is deleted from the page 00049 by just taking it off the linear list and updating the number of owned 00050 records-field of the record which owns it, and updating the page directory, 00051 if necessary. A special case is the one when the record owns itself. 00052 Because the overhead of inserts is so small, we may also increase the 00053 page size from the projected default of 8 kB to 64 kB without too 00054 much loss of efficiency in inserts. Bigger page becomes actual 00055 when the disk transfer rate compared to seek and latency time rises. 00056 On the present system, the page size is set so that the page transfer 00057 time (3 ms) is 20 % of the disk random access time (15 ms). 00058 00059 When the page is split, merged, or becomes full but contains deleted 00060 records, we have to reorganize the page. 00061 00062 Assuming a page size of 8 kB, a typical index page of a secondary 00063 index contains 300 index entries, and the size of the page directory 00064 is 50 x 4 bytes = 200 bytes. */ 00065 00066 /******************************************************************* 00067 Looks for the directory slot which owns the given record. */ 00068 00069 ulint 00070 page_dir_find_owner_slot( 00071 /*=====================*/ 00072 /* out: the directory slot number */ 00073 rec_t* rec) /* in: the physical record */ 00074 { 00075 page_t* page; 00076 register uint16 rec_offs_bytes; 00077 register page_dir_slot_t* slot; 00078 register const page_dir_slot_t* first_slot; 00079 register rec_t* r = rec; 00080 00081 ut_ad(page_rec_check(rec)); 00082 00083 page = buf_frame_align(rec); 00084 first_slot = page_dir_get_nth_slot(page, 0); 00085 slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1); 00086 00087 if (page_is_comp(page)) { 00088 while (rec_get_n_owned(r, TRUE) == 0) { 00089 r = page + rec_get_next_offs(r, TRUE); 00090 ut_ad(r >= page + PAGE_NEW_SUPREMUM); 00091 ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); 00092 } 00093 } else { 00094 while (rec_get_n_owned(r, FALSE) == 0) { 00095 r = page + rec_get_next_offs(r, FALSE); 00096 ut_ad(r >= page + PAGE_OLD_SUPREMUM); 00097 ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); 00098 } 00099 } 00100 00101 rec_offs_bytes = mach_encode_2(r - page); 00102 00103 while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) { 00104 00105 if (UNIV_UNLIKELY(slot == first_slot)) { 00106 fprintf(stderr, 00107 "InnoDB: Probable data corruption on page %lu\n" 00108 "InnoDB: Original record ", 00109 (ulong) buf_frame_get_page_no(page)); 00110 00111 if (page_is_comp(page)) { 00112 fputs("(compact record)", stderr); 00113 } else { 00114 rec_print_old(stderr, rec); 00115 } 00116 00117 fputs("\n" 00118 "InnoDB: on that page.\n" 00119 "InnoDB: Cannot find the dir slot for record ", 00120 stderr); 00121 if (page_is_comp(page)) { 00122 fputs("(compact record)", stderr); 00123 } else { 00124 rec_print_old(stderr, page 00125 + mach_decode_2(rec_offs_bytes)); 00126 } 00127 fputs("\n" 00128 "InnoDB: on that page!\n", stderr); 00129 00130 buf_page_print(page); 00131 00132 ut_error; 00133 } 00134 00135 slot += PAGE_DIR_SLOT_SIZE; 00136 } 00137 00138 return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE); 00139 } 00140 00141 /****************************************************************** 00142 Used to check the consistency of a directory slot. */ 00143 static 00144 ibool 00145 page_dir_slot_check( 00146 /*================*/ 00147 /* out: TRUE if succeed */ 00148 page_dir_slot_t* slot) /* in: slot */ 00149 { 00150 page_t* page; 00151 ulint n_slots; 00152 ulint n_owned; 00153 00154 ut_a(slot); 00155 00156 page = buf_frame_align(slot); 00157 00158 n_slots = page_dir_get_n_slots(page); 00159 00160 ut_a(slot <= page_dir_get_nth_slot(page, 0)); 00161 ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); 00162 00163 ut_a(page_rec_check(page_dir_slot_get_rec(slot))); 00164 00165 n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot), 00166 page_is_comp(page)); 00167 00168 if (slot == page_dir_get_nth_slot(page, 0)) { 00169 ut_a(n_owned == 1); 00170 } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) { 00171 ut_a(n_owned >= 1); 00172 ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); 00173 } else { 00174 ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED); 00175 ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); 00176 } 00177 00178 return(TRUE); 00179 } 00180 00181 /***************************************************************** 00182 Sets the max trx id field value. */ 00183 00184 void 00185 page_set_max_trx_id( 00186 /*================*/ 00187 page_t* page, /* in: page */ 00188 dulint trx_id) /* in: transaction id */ 00189 { 00190 buf_block_t* block; 00191 00192 ut_ad(page); 00193 00194 block = buf_block_align(page); 00195 00196 if (block->is_hashed) { 00197 rw_lock_x_lock(&btr_search_latch); 00198 } 00199 00200 /* It is not necessary to write this change to the redo log, as 00201 during a database recovery we assume that the max trx id of every 00202 page is the maximum trx id assigned before the crash. */ 00203 00204 mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id); 00205 00206 if (block->is_hashed) { 00207 rw_lock_x_unlock(&btr_search_latch); 00208 } 00209 } 00210 00211 /**************************************************************** 00212 Allocates a block of memory from an index page. */ 00213 00214 byte* 00215 page_mem_alloc( 00216 /*===========*/ 00217 /* out: pointer to start of allocated 00218 buffer, or NULL if allocation fails */ 00219 page_t* page, /* in: index page */ 00220 ulint need, /* in: number of bytes needed */ 00221 dict_index_t* index, /* in: record descriptor */ 00222 ulint* heap_no)/* out: this contains the heap number 00223 of the allocated record 00224 if allocation succeeds */ 00225 { 00226 rec_t* rec; 00227 byte* block; 00228 ulint avl_space; 00229 ulint garbage; 00230 00231 ut_ad(page && heap_no); 00232 00233 /* If there are records in the free list, look if the first is 00234 big enough */ 00235 00236 rec = page_header_get_ptr(page, PAGE_FREE); 00237 00238 if (rec) { 00239 mem_heap_t* heap = NULL; 00240 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 00241 ulint* offsets = offsets_; 00242 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 00243 00244 offsets = rec_get_offsets(rec, index, offsets, 00245 ULINT_UNDEFINED, &heap); 00246 00247 if (rec_offs_size(offsets) >= need) { 00248 page_header_set_ptr(page, PAGE_FREE, 00249 page_rec_get_next(rec)); 00250 00251 garbage = page_header_get_field(page, PAGE_GARBAGE); 00252 ut_ad(garbage >= need); 00253 00254 page_header_set_field(page, PAGE_GARBAGE, 00255 garbage - need); 00256 00257 *heap_no = rec_get_heap_no(rec, page_is_comp(page)); 00258 00259 block = rec_get_start(rec, offsets); 00260 if (UNIV_LIKELY_NULL(heap)) { 00261 mem_heap_free(heap); 00262 } 00263 return(block); 00264 } 00265 00266 if (UNIV_LIKELY_NULL(heap)) { 00267 mem_heap_free(heap); 00268 } 00269 } 00270 00271 /* Could not find space from the free list, try top of heap */ 00272 00273 avl_space = page_get_max_insert_size(page, 1); 00274 00275 if (avl_space >= need) { 00276 block = page_header_get_ptr(page, PAGE_HEAP_TOP); 00277 00278 page_header_set_ptr(page, PAGE_HEAP_TOP, block + need); 00279 *heap_no = page_dir_get_n_heap(page); 00280 00281 page_dir_set_n_heap(page, 1 + *heap_no); 00282 00283 return(block); 00284 } 00285 00286 return(NULL); 00287 } 00288 00289 /************************************************************** 00290 Writes a log record of page creation. */ 00291 UNIV_INLINE 00292 void 00293 page_create_write_log( 00294 /*==================*/ 00295 buf_frame_t* frame, /* in: a buffer frame where the page is 00296 created */ 00297 mtr_t* mtr, /* in: mini-transaction handle */ 00298 ulint comp) /* in: nonzero=compact page format */ 00299 { 00300 mlog_write_initial_log_record(frame, 00301 comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE, mtr); 00302 } 00303 00304 /*************************************************************** 00305 Parses a redo log record of creating a page. */ 00306 00307 byte* 00308 page_parse_create( 00309 /*==============*/ 00310 /* out: end of log record or NULL */ 00311 byte* ptr, /* in: buffer */ 00312 byte* end_ptr __attribute__((unused)), /* in: buffer end */ 00313 ulint comp, /* in: nonzero=compact page format */ 00314 page_t* page, /* in: page or NULL */ 00315 mtr_t* mtr) /* in: mtr or NULL */ 00316 { 00317 ut_ad(ptr && end_ptr); 00318 00319 /* The record is empty, except for the record initial part */ 00320 00321 if (page) { 00322 page_create(page, mtr, comp); 00323 } 00324 00325 return(ptr); 00326 } 00327 00328 /************************************************************** 00329 The index page creation function. */ 00330 00331 page_t* 00332 page_create( 00333 /*========*/ 00334 /* out: pointer to the page */ 00335 buf_frame_t* frame, /* in: a buffer frame where the page is 00336 created */ 00337 mtr_t* mtr, /* in: mini-transaction handle */ 00338 ulint comp) /* in: nonzero=compact page format */ 00339 { 00340 page_dir_slot_t* slot; 00341 mem_heap_t* heap; 00342 dtuple_t* tuple; 00343 dfield_t* field; 00344 byte* heap_top; 00345 rec_t* infimum_rec; 00346 rec_t* supremum_rec; 00347 page_t* page; 00348 dict_index_t* index; 00349 ulint* offsets; 00350 00351 index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1; 00352 00353 ut_ad(frame && mtr); 00354 #if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA 00355 # error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA" 00356 #endif 00357 #if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA 00358 # error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA" 00359 #endif 00360 00361 /* 1. INCREMENT MODIFY CLOCK */ 00362 buf_frame_modify_clock_inc(frame); 00363 00364 /* 2. WRITE LOG INFORMATION */ 00365 page_create_write_log(frame, mtr, comp); 00366 00367 page = frame; 00368 00369 fil_page_set_type(page, FIL_PAGE_INDEX); 00370 00371 heap = mem_heap_create(200); 00372 00373 /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */ 00374 00375 /* Create first a data tuple for infimum record */ 00376 tuple = dtuple_create(heap, 1); 00377 dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); 00378 field = dtuple_get_nth_field(tuple, 0); 00379 00380 dfield_set_data(field, "infimum", 8); 00381 dtype_set(dfield_get_type(field), 00382 DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8, 0); 00383 /* Set the corresponding physical record to its place in the page 00384 record heap */ 00385 00386 heap_top = page + PAGE_DATA; 00387 00388 infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); 00389 00390 ut_a(infimum_rec == 00391 page + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); 00392 00393 rec_set_n_owned(infimum_rec, comp, 1); 00394 rec_set_heap_no(infimum_rec, comp, 0); 00395 offsets = rec_get_offsets(infimum_rec, index, NULL, 00396 ULINT_UNDEFINED, &heap); 00397 00398 heap_top = rec_get_end(infimum_rec, offsets); 00399 00400 /* Create then a tuple for supremum */ 00401 00402 tuple = dtuple_create(heap, 1); 00403 dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); 00404 field = dtuple_get_nth_field(tuple, 0); 00405 00406 dfield_set_data(field, "supremum", comp ? 8 : 9); 00407 dtype_set(dfield_get_type(field), 00408 DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9, 0); 00409 00410 supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); 00411 00412 ut_a(supremum_rec == 00413 page + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)); 00414 00415 rec_set_n_owned(supremum_rec, comp, 1); 00416 rec_set_heap_no(supremum_rec, comp, 1); 00417 00418 offsets = rec_get_offsets(supremum_rec, index, offsets, 00419 ULINT_UNDEFINED, &heap); 00420 heap_top = rec_get_end(supremum_rec, offsets); 00421 00422 ut_ad(heap_top == 00423 page + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); 00424 00425 mem_heap_free(heap); 00426 00427 /* 4. INITIALIZE THE PAGE */ 00428 00429 page_header_set_field(page, PAGE_N_DIR_SLOTS, 2); 00430 page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top); 00431 page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2); 00432 page_header_set_ptr(page, PAGE_FREE, NULL); 00433 page_header_set_field(page, PAGE_GARBAGE, 0); 00434 page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); 00435 page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); 00436 page_header_set_field(page, PAGE_N_DIRECTION, 0); 00437 page_header_set_field(page, PAGE_N_RECS, 0); 00438 page_set_max_trx_id(page, ut_dulint_zero); 00439 memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START 00440 - (heap_top - page)); 00441 00442 /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */ 00443 00444 /* Set the slots to point to infimum and supremum. */ 00445 00446 slot = page_dir_get_nth_slot(page, 0); 00447 page_dir_slot_set_rec(slot, infimum_rec); 00448 00449 slot = page_dir_get_nth_slot(page, 1); 00450 page_dir_slot_set_rec(slot, supremum_rec); 00451 00452 /* Set the next pointers in infimum and supremum */ 00453 00454 rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page)); 00455 rec_set_next_offs(supremum_rec, comp, 0); 00456 00457 return(page); 00458 } 00459 00460 /***************************************************************** 00461 Differs from page_copy_rec_list_end, because this function does not 00462 touch the lock table and max trx id on page. */ 00463 00464 void 00465 page_copy_rec_list_end_no_locks( 00466 /*============================*/ 00467 page_t* new_page, /* in: index page to copy to */ 00468 page_t* page, /* in: index page */ 00469 rec_t* rec, /* in: record on page */ 00470 dict_index_t* index, /* in: record descriptor */ 00471 mtr_t* mtr) /* in: mtr */ 00472 { 00473 page_cur_t cur1; 00474 page_cur_t cur2; 00475 rec_t* sup; 00476 mem_heap_t* heap = NULL; 00477 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 00478 ulint* offsets = offsets_; 00479 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 00480 00481 page_cur_position(rec, &cur1); 00482 00483 if (page_cur_is_before_first(&cur1)) { 00484 00485 page_cur_move_to_next(&cur1); 00486 } 00487 00488 ut_a((ibool)!!page_is_comp(new_page) 00489 == dict_table_is_comp(index->table)); 00490 ut_a(page_is_comp(new_page) == page_is_comp(page)); 00491 ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) 00492 (page_is_comp(new_page) 00493 ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); 00494 00495 page_cur_set_before_first(new_page, &cur2); 00496 00497 /* Copy records from the original page to the new page */ 00498 00499 sup = page_get_supremum_rec(page); 00500 00501 for (;;) { 00502 rec_t* cur1_rec = page_cur_get_rec(&cur1); 00503 if (cur1_rec == sup) { 00504 break; 00505 } 00506 offsets = rec_get_offsets(cur1_rec, index, offsets, 00507 ULINT_UNDEFINED, &heap); 00508 if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index, 00509 offsets, mtr))) { 00510 /* Track an assertion failure reported on the mailing 00511 list on June 18th, 2003 */ 00512 00513 buf_page_print(new_page); 00514 buf_page_print(page); 00515 ut_print_timestamp(stderr); 00516 00517 fprintf(stderr, 00518 "InnoDB: rec offset %lu, cur1 offset %lu, cur2 offset %lu\n", 00519 (ulong)(rec - page), 00520 (ulong)(page_cur_get_rec(&cur1) - page), 00521 (ulong)(page_cur_get_rec(&cur2) - new_page)); 00522 00523 ut_error; 00524 } 00525 00526 page_cur_move_to_next(&cur1); 00527 page_cur_move_to_next(&cur2); 00528 } 00529 00530 if (UNIV_LIKELY_NULL(heap)) { 00531 mem_heap_free(heap); 00532 } 00533 } 00534 00535 /***************************************************************** 00536 Copies records from page to new_page, from a given record onward, 00537 including that record. Infimum and supremum records are not copied. 00538 The records are copied to the start of the record list on new_page. */ 00539 00540 void 00541 page_copy_rec_list_end( 00542 /*===================*/ 00543 page_t* new_page, /* in: index page to copy to */ 00544 page_t* page, /* in: index page */ 00545 rec_t* rec, /* in: record on page */ 00546 dict_index_t* index, /* in: record descriptor */ 00547 mtr_t* mtr) /* in: mtr */ 00548 { 00549 if (page_dir_get_n_heap(new_page) == 2) { 00550 page_copy_rec_list_end_to_created_page(new_page, page, rec, 00551 index, mtr); 00552 } else { 00553 page_copy_rec_list_end_no_locks(new_page, page, rec, 00554 index, mtr); 00555 } 00556 00557 /* Update the lock table, MAX_TRX_ID, and possible hash index */ 00558 00559 lock_move_rec_list_end(new_page, page, rec); 00560 00561 page_update_max_trx_id(new_page, page_get_max_trx_id(page)); 00562 00563 btr_search_move_or_delete_hash_entries(new_page, page, index); 00564 } 00565 00566 /***************************************************************** 00567 Copies records from page to new_page, up to the given record, 00568 NOT including that record. Infimum and supremum records are not copied. 00569 The records are copied to the end of the record list on new_page. */ 00570 00571 void 00572 page_copy_rec_list_start( 00573 /*=====================*/ 00574 page_t* new_page, /* in: index page to copy to */ 00575 page_t* page, /* in: index page */ 00576 rec_t* rec, /* in: record on page */ 00577 dict_index_t* index, /* in: record descriptor */ 00578 mtr_t* mtr) /* in: mtr */ 00579 { 00580 page_cur_t cur1; 00581 page_cur_t cur2; 00582 rec_t* old_end; 00583 mem_heap_t* heap = NULL; 00584 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 00585 ulint* offsets = offsets_; 00586 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 00587 00588 page_cur_set_before_first(page, &cur1); 00589 00590 if (rec == page_cur_get_rec(&cur1)) { 00591 00592 return; 00593 } 00594 00595 page_cur_move_to_next(&cur1); 00596 00597 page_cur_set_after_last(new_page, &cur2); 00598 page_cur_move_to_prev(&cur2); 00599 old_end = page_cur_get_rec(&cur2); 00600 00601 /* Copy records from the original page to the new page */ 00602 00603 while (page_cur_get_rec(&cur1) != rec) { 00604 rec_t* ins_rec; 00605 rec_t* cur1_rec = page_cur_get_rec(&cur1); 00606 offsets = rec_get_offsets(cur1_rec, index, offsets, 00607 ULINT_UNDEFINED, &heap); 00608 ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index, 00609 offsets, mtr); 00610 ut_a(ins_rec); 00611 00612 page_cur_move_to_next(&cur1); 00613 page_cur_move_to_next(&cur2); 00614 } 00615 00616 /* Update the lock table, MAX_TRX_ID, and possible hash index */ 00617 00618 lock_move_rec_list_start(new_page, page, rec, old_end); 00619 00620 page_update_max_trx_id(new_page, page_get_max_trx_id(page)); 00621 00622 btr_search_move_or_delete_hash_entries(new_page, page, index); 00623 00624 if (UNIV_LIKELY_NULL(heap)) { 00625 mem_heap_free(heap); 00626 } 00627 } 00628 00629 /************************************************************** 00630 Writes a log record of a record list end or start deletion. */ 00631 UNIV_INLINE 00632 void 00633 page_delete_rec_list_write_log( 00634 /*===========================*/ 00635 rec_t* rec, /* in: record on page */ 00636 dict_index_t* index, /* in: record descriptor */ 00637 byte type, /* in: operation type: 00638 MLOG_LIST_END_DELETE, ... */ 00639 mtr_t* mtr) /* in: mtr */ 00640 { 00641 byte* log_ptr; 00642 ut_ad(type == MLOG_LIST_END_DELETE 00643 || type == MLOG_LIST_START_DELETE 00644 || type == MLOG_COMP_LIST_END_DELETE 00645 || type == MLOG_COMP_LIST_START_DELETE); 00646 00647 log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2); 00648 if (log_ptr) { 00649 /* Write the parameter as a 2-byte ulint */ 00650 mach_write_to_2(log_ptr, ut_align_offset(rec, UNIV_PAGE_SIZE)); 00651 mlog_close(mtr, log_ptr + 2); 00652 } 00653 } 00654 00655 /************************************************************** 00656 Parses a log record of a record list end or start deletion. */ 00657 00658 byte* 00659 page_parse_delete_rec_list( 00660 /*=======================*/ 00661 /* out: end of log record or NULL */ 00662 byte type, /* in: MLOG_LIST_END_DELETE, 00663 MLOG_LIST_START_DELETE, 00664 MLOG_COMP_LIST_END_DELETE or 00665 MLOG_COMP_LIST_START_DELETE */ 00666 byte* ptr, /* in: buffer */ 00667 byte* end_ptr,/* in: buffer end */ 00668 dict_index_t* index, /* in: record descriptor */ 00669 page_t* page, /* in: page or NULL */ 00670 mtr_t* mtr) /* in: mtr or NULL */ 00671 { 00672 ulint offset; 00673 00674 ut_ad(type == MLOG_LIST_END_DELETE 00675 || type == MLOG_LIST_START_DELETE 00676 || type == MLOG_COMP_LIST_END_DELETE 00677 || type == MLOG_COMP_LIST_START_DELETE); 00678 00679 /* Read the record offset as a 2-byte ulint */ 00680 00681 if (end_ptr < ptr + 2) { 00682 00683 return(NULL); 00684 } 00685 00686 offset = mach_read_from_2(ptr); 00687 ptr += 2; 00688 00689 if (!page) { 00690 00691 return(ptr); 00692 } 00693 00694 ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); 00695 00696 if (type == MLOG_LIST_END_DELETE 00697 || type == MLOG_COMP_LIST_END_DELETE) { 00698 page_delete_rec_list_end(page, page + offset, index, 00699 ULINT_UNDEFINED, ULINT_UNDEFINED, mtr); 00700 } else { 00701 page_delete_rec_list_start(page, page + offset, index, mtr); 00702 } 00703 00704 return(ptr); 00705 } 00706 00707 /***************************************************************** 00708 Deletes records from a page from a given record onward, including that record. 00709 The infimum and supremum records are not deleted. */ 00710 00711 void 00712 page_delete_rec_list_end( 00713 /*=====================*/ 00714 page_t* page, /* in: index page */ 00715 rec_t* rec, /* in: record on page */ 00716 dict_index_t* index, /* in: record descriptor */ 00717 ulint n_recs, /* in: number of records to delete, 00718 or ULINT_UNDEFINED if not known */ 00719 ulint size, /* in: the sum of the sizes of the 00720 records in the end of the chain to 00721 delete, or ULINT_UNDEFINED if not known */ 00722 mtr_t* mtr) /* in: mtr */ 00723 { 00724 page_dir_slot_t* slot; 00725 ulint slot_index; 00726 rec_t* last_rec; 00727 rec_t* prev_rec; 00728 rec_t* free; 00729 rec_t* rec2; 00730 ulint count; 00731 ulint n_owned; 00732 rec_t* sup; 00733 ulint comp; 00734 00735 /* Reset the last insert info in the page header and increment 00736 the modify clock for the frame */ 00737 00738 ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE); 00739 page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); 00740 00741 /* The page gets invalid for optimistic searches: increment the 00742 frame modify clock */ 00743 00744 buf_frame_modify_clock_inc(page); 00745 00746 sup = page_get_supremum_rec(page); 00747 00748 comp = page_is_comp(page); 00749 if (page_rec_is_infimum_low(rec - page)) { 00750 rec = page_rec_get_next(rec); 00751 } 00752 00753 page_delete_rec_list_write_log(rec, index, 00754 comp ? MLOG_COMP_LIST_END_DELETE : MLOG_LIST_END_DELETE, mtr); 00755 00756 if (rec == sup) { 00757 00758 return; 00759 } 00760 00761 prev_rec = page_rec_get_prev(rec); 00762 00763 last_rec = page_rec_get_prev(sup); 00764 00765 if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { 00766 mem_heap_t* heap = NULL; 00767 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 00768 ulint* offsets = offsets_; 00769 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 00770 /* Calculate the sum of sizes and the number of records */ 00771 size = 0; 00772 n_recs = 0; 00773 rec2 = rec; 00774 00775 while (rec2 != sup) { 00776 ulint s; 00777 offsets = rec_get_offsets(rec2, index, offsets, 00778 ULINT_UNDEFINED, &heap); 00779 s = rec_offs_size(offsets); 00780 ut_ad(rec2 - page + s - rec_offs_extra_size(offsets) 00781 < UNIV_PAGE_SIZE); 00782 ut_ad(size + s < UNIV_PAGE_SIZE); 00783 size += s; 00784 n_recs++; 00785 00786 rec2 = page_rec_get_next(rec2); 00787 } 00788 00789 if (UNIV_LIKELY_NULL(heap)) { 00790 mem_heap_free(heap); 00791 } 00792 } 00793 00794 ut_ad(size < UNIV_PAGE_SIZE); 00795 00796 /* Update the page directory; there is no need to balance the number 00797 of the records owned by the supremum record, as it is allowed to be 00798 less than PAGE_DIR_SLOT_MIN_N_OWNED */ 00799 00800 rec2 = rec; 00801 count = 0; 00802 00803 while (rec_get_n_owned(rec2, comp) == 0) { 00804 count++; 00805 00806 rec2 = page_rec_get_next(rec2); 00807 } 00808 00809 ut_ad(rec_get_n_owned(rec2, comp) - count > 0); 00810 00811 n_owned = rec_get_n_owned(rec2, comp) - count; 00812 00813 slot_index = page_dir_find_owner_slot(rec2); 00814 slot = page_dir_get_nth_slot(page, slot_index); 00815 00816 page_dir_slot_set_rec(slot, sup); 00817 page_dir_slot_set_n_owned(slot, n_owned); 00818 00819 page_dir_set_n_slots(page, slot_index + 1); 00820 00821 /* Remove the record chain segment from the record chain */ 00822 page_rec_set_next(prev_rec, page_get_supremum_rec(page)); 00823 00824 /* Catenate the deleted chain segment to the page free list */ 00825 00826 free = page_header_get_ptr(page, PAGE_FREE); 00827 00828 page_rec_set_next(last_rec, free); 00829 page_header_set_ptr(page, PAGE_FREE, rec); 00830 00831 page_header_set_field(page, PAGE_GARBAGE, 00832 size + page_header_get_field(page, PAGE_GARBAGE)); 00833 00834 page_header_set_field(page, PAGE_N_RECS, 00835 (ulint)(page_get_n_recs(page) - n_recs)); 00836 } 00837 00838 /***************************************************************** 00839 Deletes records from page, up to the given record, NOT including 00840 that record. Infimum and supremum records are not deleted. */ 00841 00842 void 00843 page_delete_rec_list_start( 00844 /*=======================*/ 00845 page_t* page, /* in: index page */ 00846 rec_t* rec, /* in: record on page */ 00847 dict_index_t* index, /* in: record descriptor */ 00848 mtr_t* mtr) /* in: mtr */ 00849 { 00850 page_cur_t cur1; 00851 ulint log_mode; 00852 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 00853 ulint* offsets = offsets_; 00854 mem_heap_t* heap = NULL; 00855 byte type; 00856 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 00857 00858 ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); 00859 00860 if (page_is_comp(page)) { 00861 type = MLOG_COMP_LIST_START_DELETE; 00862 } else { 00863 type = MLOG_LIST_START_DELETE; 00864 } 00865 00866 page_delete_rec_list_write_log(rec, index, type, mtr); 00867 00868 page_cur_set_before_first(page, &cur1); 00869 00870 if (rec == page_cur_get_rec(&cur1)) { 00871 00872 return; 00873 } 00874 00875 page_cur_move_to_next(&cur1); 00876 00877 /* Individual deletes are not logged */ 00878 00879 log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); 00880 00881 while (page_cur_get_rec(&cur1) != rec) { 00882 offsets = rec_get_offsets(page_cur_get_rec(&cur1), index, 00883 offsets, ULINT_UNDEFINED, &heap); 00884 page_cur_delete_rec(&cur1, index, offsets, mtr); 00885 } 00886 00887 if (UNIV_LIKELY_NULL(heap)) { 00888 mem_heap_free(heap); 00889 } 00890 00891 /* Restore log mode */ 00892 00893 mtr_set_log_mode(mtr, log_mode); 00894 } 00895 00896 /***************************************************************** 00897 Moves record list end to another page. Moved records include 00898 split_rec. */ 00899 00900 void 00901 page_move_rec_list_end( 00902 /*===================*/ 00903 page_t* new_page, /* in: index page where to move */ 00904 page_t* page, /* in: index page */ 00905 rec_t* split_rec, /* in: first record to move */ 00906 dict_index_t* index, /* in: record descriptor */ 00907 mtr_t* mtr) /* in: mtr */ 00908 { 00909 ulint old_data_size; 00910 ulint new_data_size; 00911 ulint old_n_recs; 00912 ulint new_n_recs; 00913 00914 old_data_size = page_get_data_size(new_page); 00915 old_n_recs = page_get_n_recs(new_page); 00916 00917 page_copy_rec_list_end(new_page, page, split_rec, index, mtr); 00918 00919 new_data_size = page_get_data_size(new_page); 00920 new_n_recs = page_get_n_recs(new_page); 00921 00922 ut_ad(new_data_size >= old_data_size); 00923 00924 page_delete_rec_list_end(page, split_rec, index, 00925 new_n_recs - old_n_recs, new_data_size - old_data_size, mtr); 00926 } 00927 00928 /***************************************************************** 00929 Moves record list start to another page. Moved records do not include 00930 split_rec. */ 00931 00932 void 00933 page_move_rec_list_start( 00934 /*=====================*/ 00935 page_t* new_page, /* in: index page where to move */ 00936 page_t* page, /* in: index page */ 00937 rec_t* split_rec, /* in: first record not to move */ 00938 dict_index_t* index, /* in: record descriptor */ 00939 mtr_t* mtr) /* in: mtr */ 00940 { 00941 page_copy_rec_list_start(new_page, page, split_rec, index, mtr); 00942 00943 page_delete_rec_list_start(page, split_rec, index, mtr); 00944 } 00945 00946 /*************************************************************************** 00947 This is a low-level operation which is used in a database index creation 00948 to update the page number of a created B-tree to a data dictionary record. */ 00949 00950 void 00951 page_rec_write_index_page_no( 00952 /*=========================*/ 00953 rec_t* rec, /* in: record to update */ 00954 ulint i, /* in: index of the field to update */ 00955 ulint page_no,/* in: value to write */ 00956 mtr_t* mtr) /* in: mtr */ 00957 { 00958 byte* data; 00959 ulint len; 00960 00961 data = rec_get_nth_field_old(rec, i, &len); 00962 00963 ut_ad(len == 4); 00964 00965 mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr); 00966 } 00967 00968 /****************************************************************** 00969 Used to delete n slots from the directory. This function updates 00970 also n_owned fields in the records, so that the first slot after 00971 the deleted ones inherits the records of the deleted slots. */ 00972 UNIV_INLINE 00973 void 00974 page_dir_delete_slots( 00975 /*==================*/ 00976 page_t* page, /* in: the index page */ 00977 ulint start, /* in: first slot to be deleted */ 00978 ulint n) /* in: number of slots to delete (currently 00979 only n == 1 allowed) */ 00980 { 00981 page_dir_slot_t* slot; 00982 ulint i; 00983 ulint sum_owned = 0; 00984 ulint n_slots; 00985 rec_t* rec; 00986 00987 ut_ad(n == 1); 00988 ut_ad(start > 0); 00989 ut_ad(start + n < page_dir_get_n_slots(page)); 00990 00991 n_slots = page_dir_get_n_slots(page); 00992 00993 /* 1. Reset the n_owned fields of the slots to be 00994 deleted */ 00995 for (i = start; i < start + n; i++) { 00996 slot = page_dir_get_nth_slot(page, i); 00997 sum_owned += page_dir_slot_get_n_owned(slot); 00998 page_dir_slot_set_n_owned(slot, 0); 00999 } 01000 01001 /* 2. Update the n_owned value of the first non-deleted slot */ 01002 01003 slot = page_dir_get_nth_slot(page, start + n); 01004 page_dir_slot_set_n_owned(slot, 01005 sum_owned + page_dir_slot_get_n_owned(slot)); 01006 01007 /* 3. Destroy start and other slots by copying slots */ 01008 for (i = start + n; i < n_slots; i++) { 01009 slot = page_dir_get_nth_slot(page, i); 01010 rec = page_dir_slot_get_rec(slot); 01011 01012 slot = page_dir_get_nth_slot(page, i - n); 01013 page_dir_slot_set_rec(slot, rec); 01014 } 01015 01016 /* 4. Update the page header */ 01017 page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n); 01018 } 01019 01020 /****************************************************************** 01021 Used to add n slots to the directory. Does not set the record pointers 01022 in the added slots or update n_owned values: this is the responsibility 01023 of the caller. */ 01024 UNIV_INLINE 01025 void 01026 page_dir_add_slots( 01027 /*===============*/ 01028 page_t* page, /* in: the index page */ 01029 ulint start, /* in: the slot above which the new slots are added */ 01030 ulint n) /* in: number of slots to add (currently only n == 1 01031 allowed) */ 01032 { 01033 page_dir_slot_t* slot; 01034 ulint n_slots; 01035 ulint i; 01036 rec_t* rec; 01037 01038 ut_ad(n == 1); 01039 01040 n_slots = page_dir_get_n_slots(page); 01041 01042 ut_ad(start < n_slots - 1); 01043 01044 /* Update the page header */ 01045 page_dir_set_n_slots(page, n_slots + n); 01046 01047 /* Move slots up */ 01048 01049 for (i = n_slots - 1; i > start; i--) { 01050 01051 slot = page_dir_get_nth_slot(page, i); 01052 rec = page_dir_slot_get_rec(slot); 01053 01054 slot = page_dir_get_nth_slot(page, i + n); 01055 page_dir_slot_set_rec(slot, rec); 01056 } 01057 } 01058 01059 /******************************************************************** 01060 Splits a directory slot which owns too many records. */ 01061 01062 void 01063 page_dir_split_slot( 01064 /*================*/ 01065 page_t* page, /* in: the index page in question */ 01066 ulint slot_no) /* in: the directory slot */ 01067 { 01068 rec_t* rec; 01069 page_dir_slot_t* new_slot; 01070 page_dir_slot_t* prev_slot; 01071 page_dir_slot_t* slot; 01072 ulint i; 01073 ulint n_owned; 01074 01075 ut_ad(page); 01076 ut_ad(slot_no > 0); 01077 01078 slot = page_dir_get_nth_slot(page, slot_no); 01079 01080 n_owned = page_dir_slot_get_n_owned(slot); 01081 ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1); 01082 01083 /* 1. We loop to find a record approximately in the middle of the 01084 records owned by the slot. */ 01085 01086 prev_slot = page_dir_get_nth_slot(page, slot_no - 1); 01087 rec = page_dir_slot_get_rec(prev_slot); 01088 01089 for (i = 0; i < n_owned / 2; i++) { 01090 rec = page_rec_get_next(rec); 01091 } 01092 01093 ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED); 01094 01095 /* 2. We add one directory slot immediately below the slot to be 01096 split. */ 01097 01098 page_dir_add_slots(page, slot_no - 1, 1); 01099 01100 /* The added slot is now number slot_no, and the old slot is 01101 now number slot_no + 1 */ 01102 01103 new_slot = page_dir_get_nth_slot(page, slot_no); 01104 slot = page_dir_get_nth_slot(page, slot_no + 1); 01105 01106 /* 3. We store the appropriate values to the new slot. */ 01107 01108 page_dir_slot_set_rec(new_slot, rec); 01109 page_dir_slot_set_n_owned(new_slot, n_owned / 2); 01110 01111 /* 4. Finally, we update the number of records field of the 01112 original slot */ 01113 01114 page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2)); 01115 } 01116 01117 /***************************************************************** 01118 Tries to balance the given directory slot with too few records with the upper 01119 neighbor, so that there are at least the minimum number of records owned by 01120 the slot; this may result in the merging of two slots. */ 01121 01122 void 01123 page_dir_balance_slot( 01124 /*==================*/ 01125 page_t* page, /* in: index page */ 01126 ulint slot_no) /* in: the directory slot */ 01127 { 01128 page_dir_slot_t* slot; 01129 page_dir_slot_t* up_slot; 01130 ulint n_owned; 01131 ulint up_n_owned; 01132 rec_t* old_rec; 01133 rec_t* new_rec; 01134 01135 ut_ad(page); 01136 ut_ad(slot_no > 0); 01137 01138 slot = page_dir_get_nth_slot(page, slot_no); 01139 01140 /* The last directory slot cannot be balanced with the upper 01141 neighbor, as there is none. */ 01142 01143 if (slot_no == page_dir_get_n_slots(page) - 1) { 01144 01145 return; 01146 } 01147 01148 up_slot = page_dir_get_nth_slot(page, slot_no + 1); 01149 01150 n_owned = page_dir_slot_get_n_owned(slot); 01151 up_n_owned = page_dir_slot_get_n_owned(up_slot); 01152 01153 ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1); 01154 01155 /* If the upper slot has the minimum value of n_owned, we will merge 01156 the two slots, therefore we assert: */ 01157 ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED); 01158 01159 if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) { 01160 01161 /* In this case we can just transfer one record owned 01162 by the upper slot to the property of the lower slot */ 01163 old_rec = page_dir_slot_get_rec(slot); 01164 new_rec = page_rec_get_next(old_rec); 01165 01166 rec_set_n_owned(old_rec, page_is_comp(page), 0); 01167 rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1); 01168 01169 page_dir_slot_set_rec(slot, new_rec); 01170 01171 page_dir_slot_set_n_owned(up_slot, up_n_owned -1); 01172 } else { 01173 /* In this case we may merge the two slots */ 01174 page_dir_delete_slots(page, slot_no, 1); 01175 } 01176 } 01177 01178 /**************************************************************** 01179 Returns the middle record of the record list. If there are an even number 01180 of records in the list, returns the first record of the upper half-list. */ 01181 01182 rec_t* 01183 page_get_middle_rec( 01184 /*================*/ 01185 /* out: middle record */ 01186 page_t* page) /* in: page */ 01187 { 01188 page_dir_slot_t* slot; 01189 ulint middle; 01190 ulint i; 01191 ulint n_owned; 01192 ulint count; 01193 rec_t* rec; 01194 01195 /* This many records we must leave behind */ 01196 middle = (page_get_n_recs(page) + 2) / 2; 01197 01198 count = 0; 01199 01200 for (i = 0;; i++) { 01201 01202 slot = page_dir_get_nth_slot(page, i); 01203 n_owned = page_dir_slot_get_n_owned(slot); 01204 01205 if (count + n_owned > middle) { 01206 break; 01207 } else { 01208 count += n_owned; 01209 } 01210 } 01211 01212 ut_ad(i > 0); 01213 slot = page_dir_get_nth_slot(page, i - 1); 01214 rec = page_dir_slot_get_rec(slot); 01215 rec = page_rec_get_next(rec); 01216 01217 /* There are now count records behind rec */ 01218 01219 for (i = 0; i < middle - count; i++) { 01220 rec = page_rec_get_next(rec); 01221 } 01222 01223 return(rec); 01224 } 01225 01226 /******************************************************************* 01227 Returns the number of records before the given record in chain. 01228 The number includes infimum and supremum records. */ 01229 01230 ulint 01231 page_rec_get_n_recs_before( 01232 /*=======================*/ 01233 /* out: number of records */ 01234 rec_t* rec) /* in: the physical record */ 01235 { 01236 page_dir_slot_t* slot; 01237 rec_t* slot_rec; 01238 page_t* page; 01239 ulint i; 01240 ulint comp; 01241 lint n = 0; 01242 01243 ut_ad(page_rec_check(rec)); 01244 01245 page = buf_frame_align(rec); 01246 comp = page_is_comp(page); 01247 01248 while (rec_get_n_owned(rec, comp) == 0) { 01249 01250 rec = page_rec_get_next(rec); 01251 n--; 01252 } 01253 01254 for (i = 0; ; i++) { 01255 slot = page_dir_get_nth_slot(page, i); 01256 slot_rec = page_dir_slot_get_rec(slot); 01257 01258 n += rec_get_n_owned(slot_rec, comp); 01259 01260 if (rec == slot_rec) { 01261 01262 break; 01263 } 01264 } 01265 01266 n--; 01267 01268 ut_ad(n >= 0); 01269 01270 return((ulint) n); 01271 } 01272 01273 /**************************************************************** 01274 Prints record contents including the data relevant only in 01275 the index page context. */ 01276 01277 void 01278 page_rec_print( 01279 /*===========*/ 01280 rec_t* rec, /* in: physical record */ 01281 const ulint* offsets)/* in: record descriptor */ 01282 { 01283 ulint comp = page_is_comp(buf_frame_align(rec)); 01284 01285 ut_a(!comp == !rec_offs_comp(offsets)); 01286 rec_print_new(stderr, rec, offsets); 01287 fprintf(stderr, 01288 " n_owned: %lu; heap_no: %lu; next rec: %lu\n", 01289 (ulong) rec_get_n_owned(rec, comp), 01290 (ulong) rec_get_heap_no(rec, comp), 01291 (ulong) rec_get_next_offs(rec, comp)); 01292 01293 page_rec_check(rec); 01294 rec_validate(rec, offsets); 01295 } 01296 01297 /******************************************************************* 01298 This is used to print the contents of the directory for 01299 debugging purposes. */ 01300 01301 void 01302 page_dir_print( 01303 /*===========*/ 01304 page_t* page, /* in: index page */ 01305 ulint pr_n) /* in: print n first and n last entries */ 01306 { 01307 ulint n; 01308 ulint i; 01309 page_dir_slot_t* slot; 01310 01311 n = page_dir_get_n_slots(page); 01312 01313 fprintf(stderr, "--------------------------------\n" 01314 "PAGE DIRECTORY\n" 01315 "Page address %p\n" 01316 "Directory stack top at offs: %lu; number of slots: %lu\n", 01317 page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page), (ulong) n); 01318 for (i = 0; i < n; i++) { 01319 slot = page_dir_get_nth_slot(page, i); 01320 if ((i == pr_n) && (i < n - pr_n)) { 01321 fputs(" ... \n", stderr); 01322 } 01323 if ((i < pr_n) || (i >= n - pr_n)) { 01324 fprintf(stderr, 01325 "Contents of slot: %lu: n_owned: %lu, rec offs: %lu\n", 01326 (ulong) i, (ulong) page_dir_slot_get_n_owned(slot), 01327 (ulong)(page_dir_slot_get_rec(slot) - page)); 01328 } 01329 } 01330 fprintf(stderr, "Total of %lu records\n" 01331 "--------------------------------\n", 01332 (ulong) (2 + page_get_n_recs(page))); 01333 } 01334 01335 /******************************************************************* 01336 This is used to print the contents of the page record list for 01337 debugging purposes. */ 01338 01339 void 01340 page_print_list( 01341 /*============*/ 01342 page_t* page, /* in: index page */ 01343 dict_index_t* index, /* in: dictionary index of the page */ 01344 ulint pr_n) /* in: print n first and n last entries */ 01345 { 01346 page_cur_t cur; 01347 ulint count; 01348 ulint n_recs; 01349 mem_heap_t* heap = NULL; 01350 ulint offsets_[REC_OFFS_NORMAL_SIZE]; 01351 ulint* offsets = offsets_; 01352 *offsets_ = (sizeof offsets_) / sizeof *offsets_; 01353 01354 ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); 01355 01356 fprintf(stderr, 01357 "--------------------------------\n" 01358 "PAGE RECORD LIST\n" 01359 "Page address %p\n", page); 01360 01361 n_recs = page_get_n_recs(page); 01362 01363 page_cur_set_before_first(page, &cur); 01364 count = 0; 01365 for (;;) { 01366 offsets = rec_get_offsets(cur.rec, index, offsets, 01367 ULINT_UNDEFINED, &heap); 01368 page_rec_print(cur.rec, offsets); 01369 01370 if (count == pr_n) { 01371 break; 01372 } 01373 if (page_cur_is_after_last(&cur)) { 01374 break; 01375 } 01376 page_cur_move_to_next(&cur); 01377 count++; 01378 } 01379 01380 if (n_recs > 2 * pr_n) { 01381 fputs(" ... \n", stderr); 01382 } 01383 01384 while (!page_cur_is_after_last(&cur)) { 01385 page_cur_move_to_next(&cur); 01386 01387 if (count + pr_n >= n_recs) { 01388 offsets = rec_get_offsets(cur.rec, index, offsets, 01389 ULINT_UNDEFINED, &heap); 01390 page_rec_print(cur.rec, offsets); 01391 } 01392 count++; 01393 } 01394 01395 fprintf(stderr, 01396 "Total of %lu records \n" 01397 "--------------------------------\n", 01398 (ulong) (count + 1)); 01399 01400 if (UNIV_LIKELY_NULL(heap)) { 01401 mem_heap_free(heap); 01402 } 01403 } 01404 01405 /******************************************************************* 01406 Prints the info in a page header. */ 01407 01408 void 01409 page_header_print( 01410 /*==============*/ 01411 page_t* page) 01412 { 01413 fprintf(stderr, 01414 "--------------------------------\n" 01415 "PAGE HEADER INFO\n" 01416 "Page address %p, n records %lu (%s)\n" 01417 "n dir slots %lu, heap top %lu\n" 01418 "Page n heap %lu, free %lu, garbage %lu\n" 01419 "Page last insert %lu, direction %lu, n direction %lu\n", 01420 page, (ulong) page_header_get_field(page, PAGE_N_RECS), 01421 page_is_comp(page) ? "compact format" : "original format", 01422 (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS), 01423 (ulong) page_header_get_field(page, PAGE_HEAP_TOP), 01424 (ulong) page_dir_get_n_heap(page), 01425 (ulong) page_header_get_field(page, PAGE_FREE), 01426 (ulong) page_header_get_field(page, PAGE_GARBAGE), 01427 (ulong) page_header_get_field(page, PAGE_LAST_INSERT), 01428 (ulong) page_header_get_field(page, PAGE_DIRECTION), 01429 (ulong) page_header_get_field(page, PAGE_N_DIRECTION)); 01430 } 01431 01432 /******************************************************************* 01433 This is used to print the contents of the page for 01434 debugging purposes. */ 01435 01436 void 01437 page_print( 01438 /*=======*/ 01439 page_t* page, /* in: index page */ 01440 dict_index_t* index, /* in: dictionary index of the page */ 01441 ulint dn, /* in: print dn first and last entries 01442 in directory */ 01443 ulint rn) /* in: print rn first and last records 01444 in directory */ 01445 { 01446 page_header_print(page); 01447 page_dir_print(page, dn); 01448 page_print_list(page, index, rn); 01449 } 01450 01451 /******************************************************************* 01452 The following is used to validate a record on a page. This function 01453 differs from rec_validate as it can also check the n_owned field and 01454 the heap_no field. */ 01455 01456 ibool 01457 page_rec_validate( 01458 /*==============*/ 01459 /* out: TRUE if ok */ 01460 rec_t* rec, /* in: physical record */ 01461 const ulint* offsets)/* in: array returned by rec_get_offsets() */ 01462 { 01463 ulint n_owned; 01464 ulint heap_no; 01465 page_t* page; 01466 ulint comp; 01467 01468 page = buf_frame_align(rec); 01469 comp = page_is_comp(page); 01470 ut_a(!comp == !rec_offs_comp(offsets)); 01471 01472 page_rec_check(rec); 01473 rec_validate(rec, offsets); 01474 01475 n_owned = rec_get_n_owned(rec, comp); 01476 heap_no = rec_get_heap_no(rec, comp); 01477 01478 if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) { 01479 fprintf(stderr, 01480 "InnoDB: Dir slot of rec %lu, n owned too big %lu\n", 01481 (ulong)(rec - page), (ulong) n_owned); 01482 return(FALSE); 01483 } 01484 01485 if (!(heap_no < page_dir_get_n_heap(page))) { 01486 fprintf(stderr, 01487 "InnoDB: Heap no of rec %lu too big %lu %lu\n", 01488 (ulong)(rec - page), (ulong) heap_no, 01489 (ulong) page_dir_get_n_heap(page)); 01490 return(FALSE); 01491 } 01492 01493 return(TRUE); 01494 } 01495 01496 /******************************************************************* 01497 Checks that the first directory slot points to the infimum record and 01498 the last to the supremum. This function is intended to track if the 01499 bug fixed in 4.0.14 has caused corruption to users' databases. */ 01500 01501 void 01502 page_check_dir( 01503 /*===========*/ 01504 page_t* page) /* in: index page */ 01505 { 01506 ulint n_slots; 01507 01508 n_slots = page_dir_get_n_slots(page); 01509 01510 if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0)) 01511 != page_get_infimum_rec(page)) { 01512 01513 fprintf(stderr, 01514 "InnoDB: Page directory corruption: supremum not pointed to\n"); 01515 buf_page_print(page); 01516 } 01517 01518 if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1)) 01519 != page_get_supremum_rec(page)) { 01520 01521 fprintf(stderr, 01522 "InnoDB: Page directory corruption: supremum not pointed to\n"); 01523 buf_page_print(page); 01524 } 01525 } 01526 01527 /******************************************************************* 01528 This function checks the consistency of an index page when we do not 01529 know the index. This is also resilient so that this should never crash 01530 even if the page is total garbage. */ 01531 01532 ibool 01533 page_simple_validate( 01534 /*=================*/ 01535 /* out: TRUE if ok */ 01536 page_t* page) /* in: index page */ 01537 { 01538 page_cur_t cur; 01539 page_dir_slot_t* slot; 01540 ulint slot_no; 01541 ulint n_slots; 01542 rec_t* rec; 01543 byte* rec_heap_top; 01544 ulint count; 01545 ulint own_count; 01546 ibool ret = FALSE; 01547 ulint comp = page_is_comp(page); 01548 01549 /* Check first that the record heap and the directory do not 01550 overlap. */ 01551 01552 n_slots = page_dir_get_n_slots(page); 01553 01554 if (n_slots > UNIV_PAGE_SIZE / 4) { 01555 fprintf(stderr, 01556 "InnoDB: Nonsensical number %lu of page dir slots\n", (ulong) n_slots); 01557 01558 goto func_exit; 01559 } 01560 01561 rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); 01562 01563 if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) { 01564 01565 fprintf(stderr, 01566 "InnoDB: Record heap and dir overlap on a page, heap top %lu, dir %lu\n", 01567 (ulong)(page_header_get_ptr(page, PAGE_HEAP_TOP) - page), 01568 (ulong)(page_dir_get_nth_slot(page, n_slots - 1) - page)); 01569 01570 goto func_exit; 01571 } 01572 01573 /* Validate the record list in a loop checking also that it is 01574 consistent with the page record directory. */ 01575 01576 count = 0; 01577 own_count = 1; 01578 slot_no = 0; 01579 slot = page_dir_get_nth_slot(page, slot_no); 01580 01581 page_cur_set_before_first(page, &cur); 01582 01583 for (;;) { 01584 rec = (&cur)->rec; 01585 01586 if (rec > rec_heap_top) { 01587 fprintf(stderr, 01588 "InnoDB: Record %lu is above rec heap top %lu\n", 01589 (ulong)(rec - page), (ulong)(rec_heap_top - page)); 01590 01591 goto func_exit; 01592 } 01593 01594 if (rec_get_n_owned(rec, comp) != 0) { 01595 /* This is a record pointed to by a dir slot */ 01596 if (rec_get_n_owned(rec, comp) != own_count) { 01597 01598 fprintf(stderr, 01599 "InnoDB: Wrong owned count %lu, %lu, rec %lu\n", 01600 (ulong) rec_get_n_owned(rec, comp), 01601 (ulong) own_count, 01602 (ulong)(rec - page)); 01603 01604 goto func_exit; 01605 } 01606 01607 if (page_dir_slot_get_rec(slot) != rec) { 01608 fprintf(stderr, 01609 "InnoDB: Dir slot does not point to right rec %lu\n", 01610 (ulong)(rec - page)); 01611 01612 goto func_exit; 01613 } 01614 01615 own_count = 0; 01616 01617 if (!page_cur_is_after_last(&cur)) { 01618 slot_no++; 01619 slot = page_dir_get_nth_slot(page, slot_no); 01620 } 01621 } 01622 01623 if (page_cur_is_after_last(&cur)) { 01624 01625 break; 01626 } 01627 01628 if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA 01629 || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { 01630 fprintf(stderr, 01631 "InnoDB: Next record offset nonsensical %lu for rec %lu\n", 01632 (ulong) rec_get_next_offs(rec, comp), 01633 (ulong)(rec - page)); 01634 01635 goto func_exit; 01636 } 01637 01638 count++; 01639 01640 if (count > UNIV_PAGE_SIZE) { 01641 fprintf(stderr, 01642 "InnoDB: Page record list appears to be circular %lu\n", 01643 (ulong) count); 01644 goto func_exit; 01645 } 01646 01647 page_cur_move_to_next(&cur); 01648 own_count++; 01649 } 01650 01651 if (rec_get_n_owned(rec, comp) == 0) { 01652 fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); 01653 01654 goto func_exit; 01655 } 01656 01657 if (slot_no != n_slots - 1) { 01658 fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", 01659 (ulong) slot_no, (ulong) (n_slots - 1)); 01660 goto func_exit; 01661 } 01662 01663 if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) { 01664 fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", 01665 (ulong) page_header_get_field(page, PAGE_N_RECS) + 2, 01666 (ulong) (count + 1)); 01667 01668 goto func_exit; 01669 } 01670 01671 /* Check then the free list */ 01672 rec = page_header_get_ptr(page, PAGE_FREE); 01673 01674 while (rec != NULL) { 01675 if (rec < page + FIL_PAGE_DATA 01676 || rec >= page + UNIV_PAGE_SIZE) { 01677 fprintf(stderr, 01678 "InnoDB: Free list record has a nonsensical offset %lu\n", 01679 (ulong)(rec - page)); 01680 01681 goto func_exit; 01682 } 01683 01684 if (rec > rec_heap_top) { 01685 fprintf(stderr, 01686 "InnoDB: Free list record %lu is above rec heap top %lu\n", 01687 (ulong)(rec - page), (ulong)(rec_heap_top - page)); 01688 01689 goto func_exit; 01690 } 01691 01692 count++; 01693 01694 if (count > UNIV_PAGE_SIZE) { 01695 fprintf(stderr, 01696 "InnoDB: Page free list appears to be circular %lu\n", 01697 (ulong) count); 01698 goto func_exit; 01699 } 01700 01701 rec = page_rec_get_next(rec); 01702 } 01703 01704 if (page_dir_get_n_heap(page) != count + 1) { 01705 01706 fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", 01707 (ulong) page_dir_get_n_heap(page), 01708 (ulong) (count + 1)); 01709 01710 goto func_exit; 01711 } 01712 01713 ret = TRUE; 01714 01715 func_exit: 01716 return(ret); 01717 } 01718 01719 /******************************************************************* 01720 This function checks the consistency of an index page. */ 01721 01722 ibool 01723 page_validate( 01724 /*==========*/ 01725 /* out: TRUE if ok */ 01726 page_t* page, /* in: index page */ 01727 dict_index_t* index) /* in: data dictionary index containing 01728 the page record type definition */ 01729 { 01730 page_dir_slot_t* slot; 01731 mem_heap_t* heap; 01732 page_cur_t cur; 01733 byte* buf; 01734 ulint count; 01735 ulint own_count; 01736 ulint slot_no; 01737 ulint data_size; 01738 rec_t* rec; 01739 rec_t* old_rec = NULL; 01740 ulint offs; 01741 ulint n_slots; 01742 ibool ret = FALSE; 01743 ulint i; 01744 ulint comp = page_is_comp(page); 01745 ulint* offsets = NULL; 01746 ulint* old_offsets = NULL; 01747 01748 if ((ibool)!!comp != dict_table_is_comp(index->table)) { 01749 fputs("InnoDB: 'compact format' flag mismatch\n", stderr); 01750 goto func_exit2; 01751 } 01752 if (!page_simple_validate(page)) { 01753 goto func_exit2; 01754 } 01755 01756 heap = mem_heap_create(UNIV_PAGE_SIZE + 200); 01757 01758 /* The following buffer is used to check that the 01759 records in the page record heap do not overlap */ 01760 01761 buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE); 01762 memset(buf, 0, UNIV_PAGE_SIZE); 01763 01764 /* Check first that the record heap and the directory do not 01765 overlap. */ 01766 01767 n_slots = page_dir_get_n_slots(page); 01768 01769 if (!(page_header_get_ptr(page, PAGE_HEAP_TOP) <= 01770 page_dir_get_nth_slot(page, n_slots - 1))) { 01771 01772 fputs("InnoDB: Record heap and dir overlap on a page ", 01773 stderr); 01774 dict_index_name_print(stderr, NULL, index); 01775 fprintf(stderr, ", %p, %p\n", 01776 page_header_get_ptr(page, PAGE_HEAP_TOP), 01777 page_dir_get_nth_slot(page, n_slots - 1)); 01778 01779 goto func_exit; 01780 } 01781 01782 /* Validate the record list in a loop checking also that 01783 it is consistent with the directory. */ 01784 count = 0; 01785 data_size = 0; 01786 own_count = 1; 01787 slot_no = 0; 01788 slot = page_dir_get_nth_slot(page, slot_no); 01789 01790 page_cur_set_before_first(page, &cur); 01791 01792 for (;;) { 01793 rec = cur.rec; 01794 offsets = rec_get_offsets(rec, index, offsets, 01795 ULINT_UNDEFINED, &heap); 01796 01797 if (comp && page_rec_is_user_rec(rec) 01798 && rec_get_node_ptr_flag(rec) 01799 != (ibool) 01800 (btr_page_get_level_low(page) != 0)) { 01801 fputs("InnoDB: node_ptr flag mismatch\n", stderr); 01802 goto func_exit; 01803 } 01804 01805 if (!page_rec_validate(rec, offsets)) { 01806 goto func_exit; 01807 } 01808 01809 /* Check that the records are in the ascending order */ 01810 if ((count >= 2) && (!page_cur_is_after_last(&cur))) { 01811 if (!(1 == cmp_rec_rec(rec, old_rec, 01812 offsets, old_offsets, index))) { 01813 fprintf(stderr, 01814 "InnoDB: Records in wrong order on page %lu", 01815 (ulong) buf_frame_get_page_no(page)); 01816 dict_index_name_print(stderr, NULL, index); 01817 fputs("\nInnoDB: previous record ", stderr); 01818 rec_print_new(stderr, old_rec, old_offsets); 01819 fputs("\nInnoDB: record ", stderr); 01820 rec_print_new(stderr, rec, offsets); 01821 putc('\n', stderr); 01822 01823 goto func_exit; 01824 } 01825 } 01826 01827 if (page_rec_is_user_rec(rec)) { 01828 01829 data_size += rec_offs_size(offsets); 01830 } 01831 01832 offs = rec_get_start(rec, offsets) - page; 01833 01834 for (i = 0; i < rec_offs_size(offsets); i++) { 01835 if (!buf[offs + i] == 0) { 01836 /* No other record may overlap this */ 01837 01838 fputs("InnoDB: Record overlaps another\n", 01839 stderr); 01840 goto func_exit; 01841 } 01842 01843 buf[offs + i] = 1; 01844 } 01845 01846 if (rec_get_n_owned(rec, comp) != 0) { 01847 /* This is a record pointed to by a dir slot */ 01848 if (rec_get_n_owned(rec, comp) != own_count) { 01849 fprintf(stderr, 01850 "InnoDB: Wrong owned count %lu, %lu\n", 01851 (ulong) rec_get_n_owned(rec, comp), 01852 (ulong) own_count); 01853 goto func_exit; 01854 } 01855 01856 if (page_dir_slot_get_rec(slot) != rec) { 01857 fputs( 01858 "InnoDB: Dir slot does not point to right rec\n", 01859 stderr); 01860 goto func_exit; 01861 } 01862 01863 page_dir_slot_check(slot); 01864 01865 own_count = 0; 01866 if (!page_cur_is_after_last(&cur)) { 01867 slot_no++; 01868 slot = page_dir_get_nth_slot(page, slot_no); 01869 } 01870 } 01871 01872 if (page_cur_is_after_last(&cur)) { 01873 break; 01874 } 01875 01876 if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA 01877 || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { 01878 fprintf(stderr, 01879 "InnoDB: Next record offset wrong %lu\n", 01880 (ulong) rec_get_next_offs(rec, comp)); 01881 goto func_exit; 01882 } 01883 01884 count++; 01885 page_cur_move_to_next(&cur); 01886 own_count++; 01887 old_rec = rec; 01888 /* set old_offsets to offsets; recycle offsets */ 01889 { 01890 ulint* offs = old_offsets; 01891 old_offsets = offsets; 01892 offsets = offs; 01893 } 01894 } 01895 01896 if (rec_get_n_owned(rec, comp) == 0) { 01897 fputs("InnoDB: n owned is zero\n", stderr); 01898 goto func_exit; 01899 } 01900 01901 if (slot_no != n_slots - 1) { 01902 fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n", 01903 (ulong) slot_no, (ulong) (n_slots - 1)); 01904 goto func_exit; 01905 } 01906 01907 if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) { 01908 fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", 01909 (ulong) page_header_get_field(page, PAGE_N_RECS) + 2, 01910 (ulong) (count + 1)); 01911 goto func_exit; 01912 } 01913 01914 if (data_size != page_get_data_size(page)) { 01915 fprintf(stderr, 01916 "InnoDB: Summed data size %lu, returned by func %lu\n", 01917 (ulong) data_size, (ulong) page_get_data_size(page)); 01918 goto func_exit; 01919 } 01920 01921 /* Check then the free list */ 01922 rec = page_header_get_ptr(page, PAGE_FREE); 01923 01924 while (rec != NULL) { 01925 offsets = rec_get_offsets(rec, index, offsets, 01926 ULINT_UNDEFINED, &heap); 01927 if (!page_rec_validate(rec, offsets)) { 01928 01929 goto func_exit; 01930 } 01931 01932 count++; 01933 offs = rec_get_start(rec, offsets) - page; 01934 01935 for (i = 0; i < rec_offs_size(offsets); i++) { 01936 01937 if (buf[offs + i] != 0) { 01938 fputs( 01939 "InnoDB: Record overlaps another in free list\n", stderr); 01940 goto func_exit; 01941 } 01942 01943 buf[offs + i] = 1; 01944 } 01945 01946 rec = page_rec_get_next(rec); 01947 } 01948 01949 if (page_dir_get_n_heap(page) != count + 1) { 01950 fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", 01951 (ulong) page_dir_get_n_heap(page), 01952 (ulong) count + 1); 01953 goto func_exit; 01954 } 01955 01956 ret = TRUE; 01957 01958 func_exit: 01959 mem_heap_free(heap); 01960 01961 if (ret == FALSE) { 01962 func_exit2: 01963 fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ", 01964 (ulong) buf_frame_get_page_no(page)); 01965 dict_index_name_print(stderr, NULL, index); 01966 putc('\n', stderr); 01967 buf_page_print(page); 01968 } 01969 01970 return(ret); 01971 } 01972 01973 /******************************************************************* 01974 Looks in the page record list for a record with the given heap number. */ 01975 01976 rec_t* 01977 page_find_rec_with_heap_no( 01978 /*=======================*/ 01979 /* out: record, NULL if not found */ 01980 page_t* page, /* in: index page */ 01981 ulint heap_no)/* in: heap number */ 01982 { 01983 page_cur_t cur; 01984 01985 page_cur_set_before_first(page, &cur); 01986 01987 for (;;) { 01988 if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) { 01989 01990 return(cur.rec); 01991 } 01992 01993 if (page_cur_is_after_last(&cur)) { 01994 01995 return(NULL); 01996 } 01997 01998 page_cur_move_to_next(&cur); 01999 } 02000 }
1.4.7

