00001 /****************************************************** 00002 The database buffer replacement algorithm 00003 00004 (c) 1995 Innobase Oy 00005 00006 Created 11/5/1995 Heikki Tuuri 00007 *******************************************************/ 00008 00009 #include "buf0lru.h" 00010 00011 #ifdef UNIV_NONINL 00012 #include "buf0lru.ic" 00013 #include "srv0srv.h" /* Needed to getsrv_print_innodb_monitor */ 00014 #endif 00015 00016 #include "ut0byte.h" 00017 #include "ut0lst.h" 00018 #include "ut0rnd.h" 00019 #include "sync0sync.h" 00020 #include "sync0rw.h" 00021 #include "hash0hash.h" 00022 #include "os0sync.h" 00023 #include "fil0fil.h" 00024 #include "btr0btr.h" 00025 #include "buf0buf.h" 00026 #include "buf0flu.h" 00027 #include "buf0rea.h" 00028 #include "btr0sea.h" 00029 #include "os0file.h" 00030 #include "log0recv.h" 00031 00032 /* The number of blocks from the LRU_old pointer onward, including the block 00033 pointed to, must be 3/8 of the whole LRU list length, except that the 00034 tolerance defined below is allowed. Note that the tolerance must be small 00035 enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the 00036 LRU_old pointer is not allowed to point to either end of the LRU list. */ 00037 00038 #define BUF_LRU_OLD_TOLERANCE 20 00039 00040 /* The whole LRU list length is divided by this number to determine an 00041 initial segment in buf_LRU_get_recent_limit */ 00042 00043 #define BUF_LRU_INITIAL_RATIO 8 00044 00045 /* If we switch on the InnoDB monitor because there are too few available 00046 frames in the buffer pool, we set this to TRUE */ 00047 ibool buf_lru_switched_on_innodb_mon = FALSE; 00048 00049 /********************************************************************** 00050 Takes a block out of the LRU list and page hash table and sets the block 00051 state to BUF_BLOCK_REMOVE_HASH. */ 00052 static 00053 void 00054 buf_LRU_block_remove_hashed_page( 00055 /*=============================*/ 00056 buf_block_t* block); /* in: block, must contain a file page and 00057 be in a state where it can be freed; there 00058 may or may not be a hash index to the page */ 00059 /********************************************************************** 00060 Puts a file page whose has no hash index to the free list. */ 00061 static 00062 void 00063 buf_LRU_block_free_hashed_page( 00064 /*===========================*/ 00065 buf_block_t* block); /* in: block, must contain a file page and 00066 be in a state where it can be freed */ 00067 00068 /********************************************************************** 00069 Invalidates all pages belonging to a given tablespace when we are deleting 00070 the data file(s) of that tablespace. */ 00071 00072 void 00073 buf_LRU_invalidate_tablespace( 00074 /*==========================*/ 00075 ulint id) /* in: space id */ 00076 { 00077 buf_block_t* block; 00078 ulint page_no; 00079 ibool all_freed; 00080 00081 scan_again: 00082 mutex_enter(&(buf_pool->mutex)); 00083 00084 all_freed = TRUE; 00085 00086 block = UT_LIST_GET_LAST(buf_pool->LRU); 00087 00088 while (block != NULL) { 00089 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00090 00091 if (block->space == id 00092 && (block->buf_fix_count > 0 || block->io_fix != 0)) { 00093 00094 /* We cannot remove this page during this scan yet; 00095 maybe the system is currently reading it in, or 00096 flushing the modifications to the file */ 00097 00098 all_freed = FALSE; 00099 00100 goto next_page; 00101 } 00102 00103 if (block->space == id) { 00104 #ifdef UNIV_DEBUG 00105 if (buf_debug_prints) { 00106 printf( 00107 "Dropping space %lu page %lu\n", 00108 (ulong) block->space, 00109 (ulong) block->offset); 00110 } 00111 #endif 00112 if (block->is_hashed) { 00113 page_no = block->offset; 00114 00115 mutex_exit(&(buf_pool->mutex)); 00116 00117 /* Note that the following call will acquire 00118 an S-latch on the page */ 00119 00120 btr_search_drop_page_hash_when_freed(id, 00121 page_no); 00122 goto scan_again; 00123 } 00124 00125 if (0 != ut_dulint_cmp(block->oldest_modification, 00126 ut_dulint_zero)) { 00127 00128 /* Remove from the flush list of modified 00129 blocks */ 00130 block->oldest_modification = ut_dulint_zero; 00131 00132 UT_LIST_REMOVE(flush_list, 00133 buf_pool->flush_list, block); 00134 } 00135 00136 /* Remove from the LRU list */ 00137 buf_LRU_block_remove_hashed_page(block); 00138 buf_LRU_block_free_hashed_page(block); 00139 } 00140 next_page: 00141 block = UT_LIST_GET_PREV(LRU, block); 00142 } 00143 00144 mutex_exit(&(buf_pool->mutex)); 00145 00146 if (!all_freed) { 00147 os_thread_sleep(20000); 00148 00149 goto scan_again; 00150 } 00151 } 00152 00153 /********************************************************************** 00154 Gets the minimum LRU_position field for the blocks in an initial segment 00155 (determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not 00156 guaranteed to be precise, because the ulint_clock may wrap around. */ 00157 00158 ulint 00159 buf_LRU_get_recent_limit(void) 00160 /*==========================*/ 00161 /* out: the limit; zero if could not determine it */ 00162 { 00163 buf_block_t* block; 00164 ulint len; 00165 ulint limit; 00166 00167 mutex_enter(&(buf_pool->mutex)); 00168 00169 len = UT_LIST_GET_LEN(buf_pool->LRU); 00170 00171 if (len < BUF_LRU_OLD_MIN_LEN) { 00172 /* The LRU list is too short to do read-ahead */ 00173 00174 mutex_exit(&(buf_pool->mutex)); 00175 00176 return(0); 00177 } 00178 00179 block = UT_LIST_GET_FIRST(buf_pool->LRU); 00180 00181 limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO; 00182 00183 mutex_exit(&(buf_pool->mutex)); 00184 00185 return(limit); 00186 } 00187 00188 /********************************************************************** 00189 Look for a replaceable block from the end of the LRU list and put it to 00190 the free list if found. */ 00191 00192 ibool 00193 buf_LRU_search_and_free_block( 00194 /*==========================*/ 00195 /* out: TRUE if freed */ 00196 ulint n_iterations) /* in: how many times this has been called 00197 repeatedly without result: a high value means 00198 that we should search farther; if value is 00199 k < 10, then we only search k/10 * [number 00200 of pages in the buffer pool] from the end 00201 of the LRU list */ 00202 { 00203 buf_block_t* block; 00204 ulint distance = 0; 00205 ibool freed; 00206 00207 mutex_enter(&(buf_pool->mutex)); 00208 00209 freed = FALSE; 00210 block = UT_LIST_GET_LAST(buf_pool->LRU); 00211 00212 while (block != NULL) { 00213 ut_a(block->in_LRU_list); 00214 if (buf_flush_ready_for_replace(block)) { 00215 00216 #ifdef UNIV_DEBUG 00217 if (buf_debug_prints) { 00218 fprintf(stderr, 00219 "Putting space %lu page %lu to free list\n", 00220 (ulong) block->space, 00221 (ulong) block->offset); 00222 } 00223 #endif /* UNIV_DEBUG */ 00224 00225 buf_LRU_block_remove_hashed_page(block); 00226 00227 mutex_exit(&(buf_pool->mutex)); 00228 00229 /* Remove possible adaptive hash index built on the 00230 page; in the case of AWE the block may not have a 00231 frame at all */ 00232 00233 if (block->frame) { 00234 btr_search_drop_page_hash_index(block->frame); 00235 } 00236 mutex_enter(&(buf_pool->mutex)); 00237 00238 ut_a(block->buf_fix_count == 0); 00239 00240 buf_LRU_block_free_hashed_page(block); 00241 freed = TRUE; 00242 00243 break; 00244 } 00245 block = UT_LIST_GET_PREV(LRU, block); 00246 distance++; 00247 00248 if (!freed && n_iterations <= 10 00249 && distance > 100 + (n_iterations * buf_pool->curr_size) 00250 / 10) { 00251 buf_pool->LRU_flush_ended = 0; 00252 00253 mutex_exit(&(buf_pool->mutex)); 00254 00255 return(FALSE); 00256 } 00257 } 00258 if (buf_pool->LRU_flush_ended > 0) { 00259 buf_pool->LRU_flush_ended--; 00260 } 00261 if (!freed) { 00262 buf_pool->LRU_flush_ended = 0; 00263 } 00264 mutex_exit(&(buf_pool->mutex)); 00265 00266 return(freed); 00267 } 00268 00269 /********************************************************************** 00270 Tries to remove LRU flushed blocks from the end of the LRU list and put them 00271 to the free list. This is beneficial for the efficiency of the insert buffer 00272 operation, as flushed pages from non-unique non-clustered indexes are here 00273 taken out of the buffer pool, and their inserts redirected to the insert 00274 buffer. Otherwise, the flushed blocks could get modified again before read 00275 operations need new buffer blocks, and the i/o work done in flushing would be 00276 wasted. */ 00277 00278 void 00279 buf_LRU_try_free_flushed_blocks(void) 00280 /*=================================*/ 00281 { 00282 mutex_enter(&(buf_pool->mutex)); 00283 00284 while (buf_pool->LRU_flush_ended > 0) { 00285 00286 mutex_exit(&(buf_pool->mutex)); 00287 00288 buf_LRU_search_and_free_block(1); 00289 00290 mutex_enter(&(buf_pool->mutex)); 00291 } 00292 00293 mutex_exit(&(buf_pool->mutex)); 00294 } 00295 00296 /********************************************************************** 00297 Returns TRUE if less than 25 % of the buffer pool is available. This can be 00298 used in heuristics to prevent huge transactions eating up the whole buffer 00299 pool for their locks. */ 00300 00301 ibool 00302 buf_LRU_buf_pool_running_out(void) 00303 /*==============================*/ 00304 /* out: TRUE if less than 25 % of buffer pool 00305 left */ 00306 { 00307 ibool ret = FALSE; 00308 00309 mutex_enter(&(buf_pool->mutex)); 00310 00311 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) 00312 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) { 00313 00314 ret = TRUE; 00315 } 00316 00317 mutex_exit(&(buf_pool->mutex)); 00318 00319 return(ret); 00320 } 00321 00322 /********************************************************************** 00323 Returns a free block from buf_pool. The block is taken off the free list. 00324 If it is empty, blocks are moved from the end of the LRU list to the free 00325 list. */ 00326 00327 buf_block_t* 00328 buf_LRU_get_free_block(void) 00329 /*========================*/ 00330 /* out: the free control block; also if AWE is 00331 used, it is guaranteed that the block has its 00332 page mapped to a frame when we return */ 00333 { 00334 buf_block_t* block = NULL; 00335 ibool freed; 00336 ulint n_iterations = 1; 00337 ibool mon_value_was = FALSE; 00338 ibool started_monitor = FALSE; 00339 loop: 00340 mutex_enter(&(buf_pool->mutex)); 00341 00342 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) 00343 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { 00344 ut_print_timestamp(stderr); 00345 00346 fprintf(stderr, 00347 " InnoDB: ERROR: over 95 percent of the buffer pool is occupied by\n" 00348 "InnoDB: lock heaps or the adaptive hash index! Check that your\n" 00349 "InnoDB: transactions do not set too many row locks.\n" 00350 "InnoDB: Your buffer pool size is %lu MB. Maybe you should make\n" 00351 "InnoDB: the buffer pool bigger?\n" 00352 "InnoDB: We intentionally generate a seg fault to print a stack trace\n" 00353 "InnoDB: on Linux!\n", 00354 (ulong)(buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); 00355 00356 ut_error; 00357 00358 } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) 00359 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) { 00360 00361 if (!buf_lru_switched_on_innodb_mon) { 00362 00363 /* Over 67 % of the buffer pool is occupied by lock 00364 heaps or the adaptive hash index. This may be a memory 00365 leak! */ 00366 00367 ut_print_timestamp(stderr); 00368 fprintf(stderr, 00369 " InnoDB: WARNING: over 67 percent of the buffer pool is occupied by\n" 00370 "InnoDB: lock heaps or the adaptive hash index! Check that your\n" 00371 "InnoDB: transactions do not set too many row locks.\n" 00372 "InnoDB: Your buffer pool size is %lu MB. Maybe you should make\n" 00373 "InnoDB: the buffer pool bigger?\n" 00374 "InnoDB: Starting the InnoDB Monitor to print diagnostics, including\n" 00375 "InnoDB: lock heap and hash index sizes.\n", 00376 (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); 00377 00378 buf_lru_switched_on_innodb_mon = TRUE; 00379 srv_print_innodb_monitor = TRUE; 00380 os_event_set(srv_lock_timeout_thread_event); 00381 } 00382 } else if (buf_lru_switched_on_innodb_mon) { 00383 00384 /* Switch off the InnoDB Monitor; this is a simple way 00385 to stop the monitor if the situation becomes less urgent, 00386 but may also surprise users if the user also switched on the 00387 monitor! */ 00388 00389 buf_lru_switched_on_innodb_mon = FALSE; 00390 srv_print_innodb_monitor = FALSE; 00391 } 00392 00393 /* If there is a block in the free list, take it */ 00394 if (UT_LIST_GET_LEN(buf_pool->free) > 0) { 00395 00396 block = UT_LIST_GET_FIRST(buf_pool->free); 00397 ut_a(block->in_free_list); 00398 UT_LIST_REMOVE(free, buf_pool->free, block); 00399 block->in_free_list = FALSE; 00400 ut_a(block->state != BUF_BLOCK_FILE_PAGE); 00401 ut_a(!block->in_LRU_list); 00402 00403 if (srv_use_awe) { 00404 if (block->frame) { 00405 /* Remove from the list of mapped pages */ 00406 00407 UT_LIST_REMOVE(awe_LRU_free_mapped, 00408 buf_pool->awe_LRU_free_mapped, block); 00409 } else { 00410 /* We map the page to a frame; second param 00411 FALSE below because we do not want it to be 00412 added to the awe_LRU_free_mapped list */ 00413 00414 buf_awe_map_page_to_frame(block, FALSE); 00415 } 00416 } 00417 00418 block->state = BUF_BLOCK_READY_FOR_USE; 00419 00420 mutex_exit(&(buf_pool->mutex)); 00421 00422 if (started_monitor) { 00423 srv_print_innodb_monitor = mon_value_was; 00424 } 00425 00426 return(block); 00427 } 00428 00429 /* If no block was in the free list, search from the end of the LRU 00430 list and try to free a block there */ 00431 00432 mutex_exit(&(buf_pool->mutex)); 00433 00434 freed = buf_LRU_search_and_free_block(n_iterations); 00435 00436 if (freed > 0) { 00437 goto loop; 00438 } 00439 00440 if (n_iterations > 30) { 00441 ut_print_timestamp(stderr); 00442 fprintf(stderr, 00443 "InnoDB: Warning: difficult to find free blocks from\n" 00444 "InnoDB: the buffer pool (%lu search iterations)! Consider\n" 00445 "InnoDB: increasing the buffer pool size.\n" 00446 "InnoDB: It is also possible that in your Unix version\n" 00447 "InnoDB: fsync is very slow, or completely frozen inside\n" 00448 "InnoDB: the OS kernel. Then upgrading to a newer version\n" 00449 "InnoDB: of your operating system may help. Look at the\n" 00450 "InnoDB: number of fsyncs in diagnostic info below.\n" 00451 "InnoDB: Pending flushes (fsync) log: %lu; buffer pool: %lu\n" 00452 "InnoDB: %lu OS file reads, %lu OS file writes, %lu OS fsyncs\n" 00453 "InnoDB: Starting InnoDB Monitor to print further\n" 00454 "InnoDB: diagnostics to the standard output.\n", 00455 (ulong) n_iterations, 00456 (ulong) fil_n_pending_log_flushes, 00457 (ulong) fil_n_pending_tablespace_flushes, 00458 (ulong) os_n_file_reads, (ulong) os_n_file_writes, 00459 (ulong) os_n_fsyncs); 00460 00461 mon_value_was = srv_print_innodb_monitor; 00462 started_monitor = TRUE; 00463 srv_print_innodb_monitor = TRUE; 00464 os_event_set(srv_lock_timeout_thread_event); 00465 } 00466 00467 /* No free block was found: try to flush the LRU list */ 00468 00469 buf_flush_free_margin(); 00470 ++srv_buf_pool_wait_free; 00471 00472 os_aio_simulated_wake_handler_threads(); 00473 00474 mutex_enter(&(buf_pool->mutex)); 00475 00476 if (buf_pool->LRU_flush_ended > 0) { 00477 /* We have written pages in an LRU flush. To make the insert 00478 buffer more efficient, we try to move these pages to the free 00479 list. */ 00480 00481 mutex_exit(&(buf_pool->mutex)); 00482 00483 buf_LRU_try_free_flushed_blocks(); 00484 } else { 00485 mutex_exit(&(buf_pool->mutex)); 00486 } 00487 00488 if (n_iterations > 10) { 00489 00490 os_thread_sleep(500000); 00491 } 00492 00493 n_iterations++; 00494 00495 goto loop; 00496 } 00497 00498 /*********************************************************************** 00499 Moves the LRU_old pointer so that the length of the old blocks list 00500 is inside the allowed limits. */ 00501 UNIV_INLINE 00502 void 00503 buf_LRU_old_adjust_len(void) 00504 /*========================*/ 00505 { 00506 ulint old_len; 00507 ulint new_len; 00508 00509 ut_a(buf_pool->LRU_old); 00510 #ifdef UNIV_SYNC_DEBUG 00511 ut_ad(mutex_own(&(buf_pool->mutex))); 00512 #endif /* UNIV_SYNC_DEBUG */ 00513 ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5); 00514 00515 for (;;) { 00516 old_len = buf_pool->LRU_old_len; 00517 new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); 00518 00519 ut_a(buf_pool->LRU_old->in_LRU_list); 00520 00521 /* Update the LRU_old pointer if necessary */ 00522 00523 if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) { 00524 00525 buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, 00526 buf_pool->LRU_old); 00527 (buf_pool->LRU_old)->old = TRUE; 00528 buf_pool->LRU_old_len++; 00529 00530 } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) { 00531 00532 (buf_pool->LRU_old)->old = FALSE; 00533 buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, 00534 buf_pool->LRU_old); 00535 buf_pool->LRU_old_len--; 00536 } else { 00537 ut_a(buf_pool->LRU_old); /* Check that we did not 00538 fall out of the LRU list */ 00539 return; 00540 } 00541 } 00542 } 00543 00544 /*********************************************************************** 00545 Initializes the old blocks pointer in the LRU list. This function should be 00546 called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */ 00547 static 00548 void 00549 buf_LRU_old_init(void) 00550 /*==================*/ 00551 { 00552 buf_block_t* block; 00553 00554 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN); 00555 00556 /* We first initialize all blocks in the LRU list as old and then use 00557 the adjust function to move the LRU_old pointer to the right 00558 position */ 00559 00560 block = UT_LIST_GET_FIRST(buf_pool->LRU); 00561 00562 while (block != NULL) { 00563 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00564 ut_a(block->in_LRU_list); 00565 block->old = TRUE; 00566 block = UT_LIST_GET_NEXT(LRU, block); 00567 } 00568 00569 buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU); 00570 buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU); 00571 00572 buf_LRU_old_adjust_len(); 00573 } 00574 00575 /********************************************************************** 00576 Removes a block from the LRU list. */ 00577 UNIV_INLINE 00578 void 00579 buf_LRU_remove_block( 00580 /*=================*/ 00581 buf_block_t* block) /* in: control block */ 00582 { 00583 ut_ad(buf_pool); 00584 ut_ad(block); 00585 #ifdef UNIV_SYNC_DEBUG 00586 ut_ad(mutex_own(&(buf_pool->mutex))); 00587 #endif /* UNIV_SYNC_DEBUG */ 00588 00589 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00590 ut_a(block->in_LRU_list); 00591 00592 /* If the LRU_old pointer is defined and points to just this block, 00593 move it backward one step */ 00594 00595 if (block == buf_pool->LRU_old) { 00596 00597 /* Below: the previous block is guaranteed to exist, because 00598 the LRU_old pointer is only allowed to differ by the 00599 tolerance value from strict 3/8 of the LRU list length. */ 00600 00601 buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block); 00602 (buf_pool->LRU_old)->old = TRUE; 00603 00604 buf_pool->LRU_old_len++; 00605 ut_a(buf_pool->LRU_old); 00606 } 00607 00608 /* Remove the block from the LRU list */ 00609 UT_LIST_REMOVE(LRU, buf_pool->LRU, block); 00610 block->in_LRU_list = FALSE; 00611 00612 if (srv_use_awe && block->frame) { 00613 /* Remove from the list of mapped pages */ 00614 00615 UT_LIST_REMOVE(awe_LRU_free_mapped, 00616 buf_pool->awe_LRU_free_mapped, block); 00617 } 00618 00619 /* If the LRU list is so short that LRU_old not defined, return */ 00620 if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { 00621 00622 buf_pool->LRU_old = NULL; 00623 00624 return; 00625 } 00626 00627 ut_ad(buf_pool->LRU_old); 00628 00629 /* Update the LRU_old_len field if necessary */ 00630 if (block->old) { 00631 00632 buf_pool->LRU_old_len--; 00633 } 00634 00635 /* Adjust the length of the old block list if necessary */ 00636 buf_LRU_old_adjust_len(); 00637 } 00638 00639 /********************************************************************** 00640 Adds a block to the LRU list end. */ 00641 UNIV_INLINE 00642 void 00643 buf_LRU_add_block_to_end_low( 00644 /*=========================*/ 00645 buf_block_t* block) /* in: control block */ 00646 { 00647 buf_block_t* last_block; 00648 00649 ut_ad(buf_pool); 00650 ut_ad(block); 00651 #ifdef UNIV_SYNC_DEBUG 00652 ut_ad(mutex_own(&(buf_pool->mutex))); 00653 #endif /* UNIV_SYNC_DEBUG */ 00654 00655 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00656 00657 block->old = TRUE; 00658 00659 last_block = UT_LIST_GET_LAST(buf_pool->LRU); 00660 00661 if (last_block) { 00662 block->LRU_position = last_block->LRU_position; 00663 } else { 00664 block->LRU_position = buf_pool_clock_tic(); 00665 } 00666 00667 ut_a(!block->in_LRU_list); 00668 UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block); 00669 block->in_LRU_list = TRUE; 00670 00671 if (srv_use_awe && block->frame) { 00672 /* Add to the list of mapped pages */ 00673 00674 UT_LIST_ADD_LAST(awe_LRU_free_mapped, 00675 buf_pool->awe_LRU_free_mapped, block); 00676 } 00677 00678 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { 00679 00680 buf_pool->LRU_old_len++; 00681 } 00682 00683 if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { 00684 00685 ut_ad(buf_pool->LRU_old); 00686 00687 /* Adjust the length of the old block list if necessary */ 00688 00689 buf_LRU_old_adjust_len(); 00690 00691 } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { 00692 00693 /* The LRU list is now long enough for LRU_old to become 00694 defined: init it */ 00695 00696 buf_LRU_old_init(); 00697 } 00698 } 00699 00700 /********************************************************************** 00701 Adds a block to the LRU list. */ 00702 UNIV_INLINE 00703 void 00704 buf_LRU_add_block_low( 00705 /*==================*/ 00706 buf_block_t* block, /* in: control block */ 00707 ibool old) /* in: TRUE if should be put to the old blocks 00708 in the LRU list, else put to the start; if the 00709 LRU list is very short, the block is added to 00710 the start, regardless of this parameter */ 00711 { 00712 ulint cl; 00713 00714 ut_ad(buf_pool); 00715 ut_ad(block); 00716 #ifdef UNIV_SYNC_DEBUG 00717 ut_ad(mutex_own(&(buf_pool->mutex))); 00718 #endif /* UNIV_SYNC_DEBUG */ 00719 00720 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00721 ut_a(!block->in_LRU_list); 00722 00723 block->old = old; 00724 cl = buf_pool_clock_tic(); 00725 00726 if (srv_use_awe && block->frame) { 00727 /* Add to the list of mapped pages; for simplicity we always 00728 add to the start, even if the user would have set 'old' 00729 TRUE */ 00730 00731 UT_LIST_ADD_FIRST(awe_LRU_free_mapped, 00732 buf_pool->awe_LRU_free_mapped, block); 00733 } 00734 00735 if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { 00736 00737 UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block); 00738 00739 block->LRU_position = cl; 00740 block->freed_page_clock = buf_pool->freed_page_clock; 00741 } else { 00742 UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old, 00743 block); 00744 buf_pool->LRU_old_len++; 00745 00746 /* We copy the LRU position field of the previous block 00747 to the new block */ 00748 00749 block->LRU_position = (buf_pool->LRU_old)->LRU_position; 00750 } 00751 00752 block->in_LRU_list = TRUE; 00753 00754 if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { 00755 00756 ut_ad(buf_pool->LRU_old); 00757 00758 /* Adjust the length of the old block list if necessary */ 00759 00760 buf_LRU_old_adjust_len(); 00761 00762 } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { 00763 00764 /* The LRU list is now long enough for LRU_old to become 00765 defined: init it */ 00766 00767 buf_LRU_old_init(); 00768 } 00769 } 00770 00771 /********************************************************************** 00772 Adds a block to the LRU list. */ 00773 00774 void 00775 buf_LRU_add_block( 00776 /*==============*/ 00777 buf_block_t* block, /* in: control block */ 00778 ibool old) /* in: TRUE if should be put to the old 00779 blocks in the LRU list, else put to the start; 00780 if the LRU list is very short, the block is 00781 added to the start, regardless of this 00782 parameter */ 00783 { 00784 buf_LRU_add_block_low(block, old); 00785 } 00786 00787 /********************************************************************** 00788 Moves a block to the start of the LRU list. */ 00789 00790 void 00791 buf_LRU_make_block_young( 00792 /*=====================*/ 00793 buf_block_t* block) /* in: control block */ 00794 { 00795 buf_LRU_remove_block(block); 00796 buf_LRU_add_block_low(block, FALSE); 00797 } 00798 00799 /********************************************************************** 00800 Moves a block to the end of the LRU list. */ 00801 00802 void 00803 buf_LRU_make_block_old( 00804 /*===================*/ 00805 buf_block_t* block) /* in: control block */ 00806 { 00807 buf_LRU_remove_block(block); 00808 buf_LRU_add_block_to_end_low(block); 00809 } 00810 00811 /********************************************************************** 00812 Puts a block back to the free list. */ 00813 00814 void 00815 buf_LRU_block_free_non_file_page( 00816 /*=============================*/ 00817 buf_block_t* block) /* in: block, must not contain a file page */ 00818 { 00819 #ifdef UNIV_SYNC_DEBUG 00820 ut_ad(mutex_own(&(buf_pool->mutex))); 00821 #endif /* UNIV_SYNC_DEBUG */ 00822 ut_ad(block); 00823 00824 ut_a((block->state == BUF_BLOCK_MEMORY) 00825 || (block->state == BUF_BLOCK_READY_FOR_USE)); 00826 00827 ut_a(block->n_pointers == 0); 00828 ut_a(!block->in_free_list); 00829 00830 block->state = BUF_BLOCK_NOT_USED; 00831 00832 #ifdef UNIV_DEBUG 00833 /* Wipe contents of page to reveal possible stale pointers to it */ 00834 memset(block->frame, '\0', UNIV_PAGE_SIZE); 00835 #endif 00836 UT_LIST_ADD_FIRST(free, buf_pool->free, block); 00837 block->in_free_list = TRUE; 00838 00839 if (srv_use_awe && block->frame) { 00840 /* Add to the list of mapped pages */ 00841 00842 UT_LIST_ADD_FIRST(awe_LRU_free_mapped, 00843 buf_pool->awe_LRU_free_mapped, block); 00844 } 00845 } 00846 00847 /********************************************************************** 00848 Takes a block out of the LRU list and page hash table and sets the block 00849 state to BUF_BLOCK_REMOVE_HASH. */ 00850 static 00851 void 00852 buf_LRU_block_remove_hashed_page( 00853 /*=============================*/ 00854 buf_block_t* block) /* in: block, must contain a file page and 00855 be in a state where it can be freed; there 00856 may or may not be a hash index to the page */ 00857 { 00858 #ifdef UNIV_SYNC_DEBUG 00859 ut_ad(mutex_own(&(buf_pool->mutex))); 00860 #endif /* UNIV_SYNC_DEBUG */ 00861 ut_ad(block); 00862 00863 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00864 ut_a(block->io_fix == 0); 00865 ut_a(block->buf_fix_count == 0); 00866 ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0); 00867 00868 buf_LRU_remove_block(block); 00869 00870 buf_pool->freed_page_clock += 1; 00871 00872 /* Note that if AWE is enabled the block may not have a frame at all */ 00873 00874 buf_block_modify_clock_inc(block); 00875 00876 if (block != buf_page_hash_get(block->space, block->offset)) { 00877 fprintf(stderr, 00878 "InnoDB: Error: page %lu %lu not found from the hash table\n", 00879 (ulong) block->space, 00880 (ulong) block->offset); 00881 if (buf_page_hash_get(block->space, block->offset)) { 00882 fprintf(stderr, 00883 "InnoDB: From hash table we find block %p of %lu %lu which is not %p\n", 00884 (void*) buf_page_hash_get(block->space, block->offset), 00885 (ulong) buf_page_hash_get(block->space, block->offset)->space, 00886 (ulong) buf_page_hash_get(block->space, block->offset)->offset, 00887 (void*) block); 00888 } 00889 00890 #ifdef UNIV_DEBUG 00891 buf_print(); 00892 buf_LRU_print(); 00893 buf_validate(); 00894 buf_LRU_validate(); 00895 #endif 00896 ut_a(0); 00897 } 00898 00899 HASH_DELETE(buf_block_t, hash, buf_pool->page_hash, 00900 buf_page_address_fold(block->space, block->offset), 00901 block); 00902 00903 block->state = BUF_BLOCK_REMOVE_HASH; 00904 } 00905 00906 /********************************************************************** 00907 Puts a file page whose has no hash index to the free list. */ 00908 static 00909 void 00910 buf_LRU_block_free_hashed_page( 00911 /*===========================*/ 00912 buf_block_t* block) /* in: block, must contain a file page and 00913 be in a state where it can be freed */ 00914 { 00915 #ifdef UNIV_SYNC_DEBUG 00916 ut_ad(mutex_own(&(buf_pool->mutex))); 00917 #endif /* UNIV_SYNC_DEBUG */ 00918 ut_a(block->state == BUF_BLOCK_REMOVE_HASH); 00919 00920 block->state = BUF_BLOCK_MEMORY; 00921 00922 buf_LRU_block_free_non_file_page(block); 00923 } 00924 00925 #ifdef UNIV_DEBUG 00926 /************************************************************************** 00927 Validates the LRU list. */ 00928 00929 ibool 00930 buf_LRU_validate(void) 00931 /*==================*/ 00932 { 00933 buf_block_t* block; 00934 ulint old_len; 00935 ulint new_len; 00936 ulint LRU_pos; 00937 00938 ut_ad(buf_pool); 00939 mutex_enter(&(buf_pool->mutex)); 00940 00941 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { 00942 00943 ut_a(buf_pool->LRU_old); 00944 old_len = buf_pool->LRU_old_len; 00945 new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); 00946 ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE); 00947 ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); 00948 } 00949 00950 UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU); 00951 00952 block = UT_LIST_GET_FIRST(buf_pool->LRU); 00953 00954 old_len = 0; 00955 00956 while (block != NULL) { 00957 00958 ut_a(block->state == BUF_BLOCK_FILE_PAGE); 00959 00960 if (block->old) { 00961 old_len++; 00962 } 00963 00964 if (buf_pool->LRU_old && (old_len == 1)) { 00965 ut_a(buf_pool->LRU_old == block); 00966 } 00967 00968 LRU_pos = block->LRU_position; 00969 00970 block = UT_LIST_GET_NEXT(LRU, block); 00971 00972 if (block) { 00973 /* If the following assert fails, it may 00974 not be an error: just the buf_pool clock 00975 has wrapped around */ 00976 ut_a(LRU_pos >= block->LRU_position); 00977 } 00978 } 00979 00980 if (buf_pool->LRU_old) { 00981 ut_a(buf_pool->LRU_old_len == old_len); 00982 } 00983 00984 UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free); 00985 00986 block = UT_LIST_GET_FIRST(buf_pool->free); 00987 00988 while (block != NULL) { 00989 ut_a(block->state == BUF_BLOCK_NOT_USED); 00990 00991 block = UT_LIST_GET_NEXT(free, block); 00992 } 00993 00994 mutex_exit(&(buf_pool->mutex)); 00995 return(TRUE); 00996 } 00997 00998 /************************************************************************** 00999 Prints the LRU list. */ 01000 01001 void 01002 buf_LRU_print(void) 01003 /*===============*/ 01004 { 01005 buf_block_t* block; 01006 buf_frame_t* frame; 01007 ulint len; 01008 01009 ut_ad(buf_pool); 01010 mutex_enter(&(buf_pool->mutex)); 01011 01012 fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock); 01013 01014 block = UT_LIST_GET_FIRST(buf_pool->LRU); 01015 01016 len = 0; 01017 01018 while (block != NULL) { 01019 01020 fprintf(stderr, "BLOCK %lu ", (ulong) block->offset); 01021 01022 if (block->old) { 01023 fputs("old ", stderr); 01024 } 01025 01026 if (block->buf_fix_count) { 01027 fprintf(stderr, "buffix count %lu ", 01028 (ulong) block->buf_fix_count); 01029 } 01030 01031 if (block->io_fix) { 01032 fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix); 01033 } 01034 01035 if (ut_dulint_cmp(block->oldest_modification, 01036 ut_dulint_zero) > 0) { 01037 fputs("modif. ", stderr); 01038 } 01039 01040 frame = buf_block_get_frame(block); 01041 01042 fprintf(stderr, "LRU pos %lu type %lu index id %lu ", 01043 (ulong) block->LRU_position, 01044 (ulong) fil_page_get_type(frame), 01045 (ulong) ut_dulint_get_low(btr_page_get_index_id(frame))); 01046 01047 block = UT_LIST_GET_NEXT(LRU, block); 01048 if (++len == 10) { 01049 len = 0; 01050 putc('\n', stderr); 01051 } 01052 } 01053 01054 mutex_exit(&(buf_pool->mutex)); 01055 } 01056 #endif /* UNIV_DEBUG */
1.4.7

