00001 /* Innobase relational database engine; Copyright (C) 2001 Innobase Oy 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License 2 00005 as published by the Free Software Foundation in June 1991. 00006 00007 This program is distributed in the hope that it will be useful, 00008 but WITHOUT ANY WARRANTY; without even the implied warranty of 00009 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00010 GNU General Public License for more details. 00011 00012 You should have received a copy of the GNU General Public License 2 00013 along with this program (in file COPYING); if not, write to the Free 00014 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ 00015 /****************************************************** 00016 The database buffer pool high-level routines 00017 00018 (c) 1995 Innobase Oy 00019 00020 Created 11/5/1995 Heikki Tuuri 00021 *******************************************************/ 00022 00023 #ifndef buf0buf_h 00024 #define buf0buf_h 00025 00026 #include "univ.i" 00027 #include "fil0fil.h" 00028 #include "mtr0types.h" 00029 #include "buf0types.h" 00030 #include "sync0rw.h" 00031 #include "hash0hash.h" 00032 #include "ut0byte.h" 00033 #include "os0proc.h" 00034 00035 /* Flags for flush types */ 00036 #define BUF_FLUSH_LRU 1 00037 #define BUF_FLUSH_SINGLE_PAGE 2 00038 #define BUF_FLUSH_LIST 3 /* An array in the pool struct 00039 has size BUF_FLUSH_LIST + 1: if you 00040 add more flush types, put them in 00041 the middle! */ 00042 /* Modes for buf_page_get_gen */ 00043 #define BUF_GET 10 /* get always */ 00044 #define BUF_GET_IF_IN_POOL 11 /* get if in pool */ 00045 #define BUF_GET_NOWAIT 12 /* get if can set the latch without 00046 waiting */ 00047 #define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch; 00048 we have separated this case, because 00049 it is error-prone programming not to 00050 set a latch, and it should be used 00051 with care */ 00052 /* Modes for buf_page_get_known_nowait */ 00053 #define BUF_MAKE_YOUNG 51 00054 #define BUF_KEEP_OLD 52 00055 /* Magic value to use instead of checksums when they are disabled */ 00056 #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL 00057 00058 extern buf_pool_t* buf_pool; /* The buffer pool of the database */ 00059 #ifdef UNIV_DEBUG 00060 extern ibool buf_debug_prints;/* If this is set TRUE, the program 00061 prints info whenever read or flush 00062 occurs */ 00063 #endif /* UNIV_DEBUG */ 00064 extern ulint srv_buf_pool_write_requests; /* variable to count write request 00065 issued */ 00066 00067 /************************************************************************ 00068 Creates the buffer pool. */ 00069 00070 buf_pool_t* 00071 buf_pool_init( 00072 /*==========*/ 00073 /* out, own: buf_pool object, NULL if not 00074 enough memory or error */ 00075 ulint max_size, /* in: maximum size of the buf_pool in 00076 blocks */ 00077 ulint curr_size, /* in: current size to use, must be <= 00078 max_size, currently must be equal to 00079 max_size */ 00080 ulint n_frames); /* in: number of frames; if AWE is used, 00081 this is the size of the address space window 00082 where physical memory pages are mapped; if 00083 AWE is not used then this must be the same 00084 as max_size */ 00085 /************************************************************************* 00086 Gets the current size of buffer buf_pool in bytes. In the case of AWE, the 00087 size of AWE window (= the frames). */ 00088 UNIV_INLINE 00089 ulint 00090 buf_pool_get_curr_size(void); 00091 /*========================*/ 00092 /* out: size in bytes */ 00093 /************************************************************************* 00094 Gets the maximum size of buffer pool in bytes. In the case of AWE, the 00095 size of AWE window (= the frames). */ 00096 UNIV_INLINE 00097 ulint 00098 buf_pool_get_max_size(void); 00099 /*=======================*/ 00100 /* out: size in bytes */ 00101 /************************************************************************ 00102 Gets the smallest oldest_modification lsn for any page in the pool. Returns 00103 ut_dulint_zero if all modified pages have been flushed to disk. */ 00104 UNIV_INLINE 00105 dulint 00106 buf_pool_get_oldest_modification(void); 00107 /*==================================*/ 00108 /* out: oldest modification in pool, 00109 ut_dulint_zero if none */ 00110 /************************************************************************* 00111 Allocates a buffer frame. */ 00112 00113 buf_frame_t* 00114 buf_frame_alloc(void); 00115 /*==================*/ 00116 /* out: buffer frame */ 00117 /************************************************************************* 00118 Frees a buffer frame which does not contain a file page. */ 00119 00120 void 00121 buf_frame_free( 00122 /*===========*/ 00123 buf_frame_t* frame); /* in: buffer frame */ 00124 /************************************************************************* 00125 Copies contents of a buffer frame to a given buffer. */ 00126 UNIV_INLINE 00127 byte* 00128 buf_frame_copy( 00129 /*===========*/ 00130 /* out: buf */ 00131 byte* buf, /* in: buffer to copy to */ 00132 buf_frame_t* frame); /* in: buffer frame */ 00133 /****************************************************************** 00134 NOTE! The following macros should be used instead of buf_page_get_gen, 00135 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed 00136 in LA! */ 00137 #define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ 00138 SP, OF, LA, NULL,\ 00139 BUF_GET, __FILE__, __LINE__, MTR) 00140 /****************************************************************** 00141 Use these macros to bufferfix a page with no latching. Remember not to 00142 read the contents of the page unless you know it is safe. Do not modify 00143 the contents of the page! We have separated this case, because it is 00144 error-prone programming not to set a latch, and it should be used 00145 with care. */ 00146 #define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ 00147 SP, OF, RW_NO_LATCH, NULL,\ 00148 BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) 00149 /****************************************************************** 00150 NOTE! The following macros should be used instead of buf_page_get_gen, to 00151 improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ 00152 #define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ 00153 SP, OF, LA, NULL,\ 00154 BUF_GET_NOWAIT, __FILE__, __LINE__, MTR) 00155 /****************************************************************** 00156 NOTE! The following macros should be used instead of 00157 buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and 00158 RW_X_LATCH are allowed as LA! */ 00159 #define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\ 00160 LA, BL, G, MC, __FILE__, __LINE__, MTR) 00161 /************************************************************************ 00162 This is the general function used to get optimistic access to a database 00163 page. */ 00164 00165 ibool 00166 buf_page_optimistic_get_func( 00167 /*=========================*/ 00168 /* out: TRUE if success */ 00169 ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ 00170 buf_block_t* block, /* in: guessed block */ 00171 buf_frame_t* guess, /* in: guessed frame; note that AWE may move 00172 frames */ 00173 dulint modify_clock,/* in: modify clock value if mode is 00174 ..._GUESS_ON_CLOCK */ 00175 const char* file, /* in: file name */ 00176 ulint line, /* in: line where called */ 00177 mtr_t* mtr); /* in: mini-transaction */ 00178 /************************************************************************ 00179 Tries to get the page, but if file io is required, releases all latches 00180 in mtr down to the given savepoint. If io is required, this function 00181 retrieves the page to buffer buf_pool, but does not bufferfix it or latch 00182 it. */ 00183 UNIV_INLINE 00184 buf_frame_t* 00185 buf_page_get_release_on_io( 00186 /*=======================*/ 00187 /* out: pointer to the frame, or NULL 00188 if not in buffer buf_pool */ 00189 ulint space, /* in: space id */ 00190 ulint offset, /* in: offset of the page within space 00191 in units of a page */ 00192 buf_frame_t* guess, /* in: guessed frame or NULL */ 00193 ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH, 00194 or RW_NO_LATCH */ 00195 ulint savepoint, /* in: mtr savepoint */ 00196 mtr_t* mtr); /* in: mtr */ 00197 /************************************************************************ 00198 This is used to get access to a known database page, when no waiting can be 00199 done. */ 00200 00201 ibool 00202 buf_page_get_known_nowait( 00203 /*======================*/ 00204 /* out: TRUE if success */ 00205 ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ 00206 buf_frame_t* guess, /* in: the known page frame */ 00207 ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ 00208 const char* file, /* in: file name */ 00209 ulint line, /* in: line where called */ 00210 mtr_t* mtr); /* in: mini-transaction */ 00211 /************************************************************************ 00212 This is the general function used to get access to a database page. */ 00213 00214 buf_frame_t* 00215 buf_page_get_gen( 00216 /*=============*/ 00217 /* out: pointer to the frame or NULL */ 00218 ulint space, /* in: space id */ 00219 ulint offset, /* in: page number */ 00220 ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ 00221 buf_frame_t* guess, /* in: guessed frame or NULL */ 00222 ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, 00223 BUF_GET_NO_LATCH */ 00224 const char* file, /* in: file name */ 00225 ulint line, /* in: line where called */ 00226 mtr_t* mtr); /* in: mini-transaction */ 00227 /************************************************************************ 00228 Initializes a page to the buffer buf_pool. The page is usually not read 00229 from a file even if it cannot be found in the buffer buf_pool. This is one 00230 of the functions which perform to a block a state transition NOT_USED => 00231 FILE_PAGE (the other is buf_page_init_for_read above). */ 00232 00233 buf_frame_t* 00234 buf_page_create( 00235 /*============*/ 00236 /* out: pointer to the frame, page bufferfixed */ 00237 ulint space, /* in: space id */ 00238 ulint offset, /* in: offset of the page within space in units of 00239 a page */ 00240 mtr_t* mtr); /* in: mini-transaction handle */ 00241 /************************************************************************ 00242 Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ 00243 00244 void 00245 buf_page_init_for_backup_restore( 00246 /*=============================*/ 00247 ulint space, /* in: space id */ 00248 ulint offset, /* in: offset of the page within space 00249 in units of a page */ 00250 buf_block_t* block); /* in: block to init */ 00251 /************************************************************************ 00252 Decrements the bufferfix count of a buffer control block and releases 00253 a latch, if specified. */ 00254 UNIV_INLINE 00255 void 00256 buf_page_release( 00257 /*=============*/ 00258 buf_block_t* block, /* in: buffer block */ 00259 ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH, 00260 RW_NO_LATCH */ 00261 mtr_t* mtr); /* in: mtr */ 00262 /************************************************************************ 00263 Moves a page to the start of the buffer pool LRU list. This high-level 00264 function can be used to prevent an important page from from slipping out of 00265 the buffer pool. */ 00266 00267 void 00268 buf_page_make_young( 00269 /*================*/ 00270 buf_frame_t* frame); /* in: buffer frame of a file page */ 00271 /************************************************************************ 00272 Returns TRUE if the page can be found in the buffer pool hash table. NOTE 00273 that it is possible that the page is not yet read from disk, though. */ 00274 00275 ibool 00276 buf_page_peek( 00277 /*==========*/ 00278 /* out: TRUE if found from page hash table, 00279 NOTE that the page is not necessarily yet read 00280 from disk! */ 00281 ulint space, /* in: space id */ 00282 ulint offset);/* in: page number */ 00283 /************************************************************************ 00284 Returns the buffer control block if the page can be found in the buffer 00285 pool. NOTE that it is possible that the page is not yet read 00286 from disk, though. This is a very low-level function: use with care! */ 00287 00288 buf_block_t* 00289 buf_page_peek_block( 00290 /*================*/ 00291 /* out: control block if found from page hash table, 00292 otherwise NULL; NOTE that the page is not necessarily 00293 yet read from disk! */ 00294 ulint space, /* in: space id */ 00295 ulint offset);/* in: page number */ 00296 /************************************************************************ 00297 Resets the check_index_page_at_flush field of a page if found in the buffer 00298 pool. */ 00299 00300 void 00301 buf_reset_check_index_page_at_flush( 00302 /*================================*/ 00303 ulint space, /* in: space id */ 00304 ulint offset);/* in: page number */ 00305 /************************************************************************ 00306 Sets file_page_was_freed TRUE if the page is found in the buffer pool. 00307 This function should be called when we free a file page and want the 00308 debug version to check that it is not accessed any more unless 00309 reallocated. */ 00310 00311 buf_block_t* 00312 buf_page_set_file_page_was_freed( 00313 /*=============================*/ 00314 /* out: control block if found from page hash table, 00315 otherwise NULL */ 00316 ulint space, /* in: space id */ 00317 ulint offset); /* in: page number */ 00318 /************************************************************************ 00319 Sets file_page_was_freed FALSE if the page is found in the buffer pool. 00320 This function should be called when we free a file page and want the 00321 debug version to check that it is not accessed any more unless 00322 reallocated. */ 00323 00324 buf_block_t* 00325 buf_page_reset_file_page_was_freed( 00326 /*===============================*/ 00327 /* out: control block if found from page hash table, 00328 otherwise NULL */ 00329 ulint space, /* in: space id */ 00330 ulint offset); /* in: page number */ 00331 /************************************************************************ 00332 Recommends a move of a block to the start of the LRU list if there is danger 00333 of dropping from the buffer pool. NOTE: does not reserve the buffer pool 00334 mutex. */ 00335 UNIV_INLINE 00336 ibool 00337 buf_block_peek_if_too_old( 00338 /*======================*/ 00339 /* out: TRUE if should be made younger */ 00340 buf_block_t* block); /* in: block to make younger */ 00341 /************************************************************************ 00342 Returns the current state of is_hashed of a page. FALSE if the page is 00343 not in the pool. NOTE that this operation does not fix the page in the 00344 pool if it is found there. */ 00345 00346 ibool 00347 buf_page_peek_if_search_hashed( 00348 /*===========================*/ 00349 /* out: TRUE if page hash index is built in search 00350 system */ 00351 ulint space, /* in: space id */ 00352 ulint offset);/* in: page number */ 00353 /************************************************************************ 00354 Gets the youngest modification log sequence number for a frame. 00355 Returns zero if not file page or no modification occurred yet. */ 00356 UNIV_INLINE 00357 dulint 00358 buf_frame_get_newest_modification( 00359 /*==============================*/ 00360 /* out: newest modification to page */ 00361 buf_frame_t* frame); /* in: pointer to a frame */ 00362 /************************************************************************ 00363 Increments the modify clock of a frame by 1. The caller must (1) own the 00364 pool mutex and block bufferfix count has to be zero, (2) or own an x-lock 00365 on the block. */ 00366 UNIV_INLINE 00367 dulint 00368 buf_frame_modify_clock_inc( 00369 /*=======================*/ 00370 /* out: new value */ 00371 buf_frame_t* frame); /* in: pointer to a frame */ 00372 /************************************************************************ 00373 Increments the modify clock of a frame by 1. The caller must (1) own the 00374 buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock 00375 on the block. */ 00376 UNIV_INLINE 00377 dulint 00378 buf_block_modify_clock_inc( 00379 /*=======================*/ 00380 /* out: new value */ 00381 buf_block_t* block); /* in: block */ 00382 /************************************************************************ 00383 Returns the value of the modify clock. The caller must have an s-lock 00384 or x-lock on the block. */ 00385 UNIV_INLINE 00386 dulint 00387 buf_block_get_modify_clock( 00388 /*=======================*/ 00389 /* out: value */ 00390 buf_block_t* block); /* in: block */ 00391 /************************************************************************ 00392 Calculates a page checksum which is stored to the page when it is written 00393 to a file. Note that we must be careful to calculate the same value 00394 on 32-bit and 64-bit architectures. */ 00395 00396 ulint 00397 buf_calc_page_new_checksum( 00398 /*=======================*/ 00399 /* out: checksum */ 00400 byte* page); /* in: buffer page */ 00401 /************************************************************************ 00402 In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only 00403 looked at the first few bytes of the page. This calculates that old 00404 checksum. 00405 NOTE: we must first store the new formula checksum to 00406 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum 00407 because this takes that field as an input! */ 00408 00409 ulint 00410 buf_calc_page_old_checksum( 00411 /*=======================*/ 00412 /* out: checksum */ 00413 byte* page); /* in: buffer page */ 00414 /************************************************************************ 00415 Checks if a page is corrupt. */ 00416 00417 ibool 00418 buf_page_is_corrupted( 00419 /*==================*/ 00420 /* out: TRUE if corrupted */ 00421 byte* read_buf); /* in: a database page */ 00422 /************************************************************************** 00423 Gets the page number of a pointer pointing within a buffer frame containing 00424 a file page. */ 00425 UNIV_INLINE 00426 ulint 00427 buf_frame_get_page_no( 00428 /*==================*/ 00429 /* out: page number */ 00430 byte* ptr); /* in: pointer to within a buffer frame */ 00431 /************************************************************************** 00432 Gets the space id of a pointer pointing within a buffer frame containing a 00433 file page. */ 00434 UNIV_INLINE 00435 ulint 00436 buf_frame_get_space_id( 00437 /*===================*/ 00438 /* out: space id */ 00439 byte* ptr); /* in: pointer to within a buffer frame */ 00440 /************************************************************************** 00441 Gets the space id, page offset, and byte offset within page of a 00442 pointer pointing to a buffer frame containing a file page. */ 00443 UNIV_INLINE 00444 void 00445 buf_ptr_get_fsp_addr( 00446 /*=================*/ 00447 byte* ptr, /* in: pointer to a buffer frame */ 00448 ulint* space, /* out: space id */ 00449 fil_addr_t* addr); /* out: page offset and byte offset */ 00450 /************************************************************************** 00451 Gets the hash value of the page the pointer is pointing to. This can be used 00452 in searches in the lock hash table. */ 00453 UNIV_INLINE 00454 ulint 00455 buf_frame_get_lock_hash_val( 00456 /*========================*/ 00457 /* out: lock hash value */ 00458 byte* ptr); /* in: pointer to within a buffer frame */ 00459 /************************************************************************** 00460 Gets the mutex number protecting the page record lock hash chain in the lock 00461 table. */ 00462 UNIV_INLINE 00463 mutex_t* 00464 buf_frame_get_lock_mutex( 00465 /*=====================*/ 00466 /* out: mutex */ 00467 byte* ptr); /* in: pointer to within a buffer frame */ 00468 /*********************************************************************** 00469 Gets the frame the pointer is pointing to. */ 00470 UNIV_INLINE 00471 buf_frame_t* 00472 buf_frame_align( 00473 /*============*/ 00474 /* out: pointer to frame */ 00475 byte* ptr); /* in: pointer to a frame */ 00476 /*********************************************************************** 00477 Checks if a pointer points to the block array of the buffer pool (blocks, not 00478 the frames). */ 00479 UNIV_INLINE 00480 ibool 00481 buf_pool_is_block( 00482 /*==============*/ 00483 /* out: TRUE if pointer to block */ 00484 void* ptr); /* in: pointer to memory */ 00485 #ifdef UNIV_DEBUG 00486 /************************************************************************* 00487 Validates the buffer pool data structure. */ 00488 00489 ibool 00490 buf_validate(void); 00491 /*==============*/ 00492 /************************************************************************* 00493 Prints info of the buffer pool data structure. */ 00494 00495 void 00496 buf_print(void); 00497 /*============*/ 00498 #endif /* UNIV_DEBUG */ 00499 /************************************************************************ 00500 Prints a page to stderr. */ 00501 00502 void 00503 buf_page_print( 00504 /*===========*/ 00505 byte* read_buf); /* in: a database page */ 00506 /************************************************************************* 00507 Returns the number of latched pages in the buffer pool. */ 00508 00509 ulint 00510 buf_get_latched_pages_number(void); 00511 /*==============================*/ 00512 /************************************************************************* 00513 Returns the number of pending buf pool ios. */ 00514 00515 ulint 00516 buf_get_n_pending_ios(void); 00517 /*=======================*/ 00518 /************************************************************************* 00519 Prints info of the buffer i/o. */ 00520 00521 void 00522 buf_print_io( 00523 /*=========*/ 00524 FILE* file); /* in: file where to print */ 00525 /************************************************************************* 00526 Returns the ratio in percents of modified pages in the buffer pool / 00527 database pages in the buffer pool. */ 00528 00529 ulint 00530 buf_get_modified_ratio_pct(void); 00531 /*============================*/ 00532 /************************************************************************** 00533 Refreshes the statistics used to print per-second averages. */ 00534 00535 void 00536 buf_refresh_io_stats(void); 00537 /*======================*/ 00538 /************************************************************************* 00539 Checks that all file pages in the buffer are in a replaceable state. */ 00540 00541 ibool 00542 buf_all_freed(void); 00543 /*===============*/ 00544 /************************************************************************* 00545 Checks that there currently are no pending i/o-operations for the buffer 00546 pool. */ 00547 00548 ibool 00549 buf_pool_check_no_pending_io(void); 00550 /*==============================*/ 00551 /* out: TRUE if there is no pending i/o */ 00552 /************************************************************************* 00553 Invalidates the file pages in the buffer pool when an archive recovery is 00554 completed. All the file pages buffered must be in a replaceable state when 00555 this function is called: not latched and not modified. */ 00556 00557 void 00558 buf_pool_invalidate(void); 00559 /*=====================*/ 00560 00561 /*======================================================================== 00562 --------------------------- LOWER LEVEL ROUTINES ------------------------- 00563 =========================================================================*/ 00564 00565 /************************************************************************ 00566 Maps the page of block to a frame, if not mapped yet. Unmaps some page 00567 from the end of the awe_LRU_free_mapped. */ 00568 00569 void 00570 buf_awe_map_page_to_frame( 00571 /*======================*/ 00572 buf_block_t* block, /* in: block whose page should be 00573 mapped to a frame */ 00574 ibool add_to_mapped_list);/* in: TRUE if we in the case 00575 we need to map the page should also 00576 add the block to the 00577 awe_LRU_free_mapped list */ 00578 #ifdef UNIV_SYNC_DEBUG 00579 /************************************************************************* 00580 Adds latch level info for the rw-lock protecting the buffer frame. This 00581 should be called in the debug version after a successful latching of a 00582 page if we know the latching order level of the acquired latch. */ 00583 UNIV_INLINE 00584 void 00585 buf_page_dbg_add_level( 00586 /*===================*/ 00587 buf_frame_t* frame, /* in: buffer page where we have acquired 00588 a latch */ 00589 ulint level); /* in: latching order level */ 00590 #endif /* UNIV_SYNC_DEBUG */ 00591 /************************************************************************* 00592 Gets a pointer to the memory frame of a block. */ 00593 UNIV_INLINE 00594 buf_frame_t* 00595 buf_block_get_frame( 00596 /*================*/ 00597 /* out: pointer to the frame */ 00598 buf_block_t* block); /* in: pointer to the control block */ 00599 /************************************************************************* 00600 Gets the space id of a block. */ 00601 UNIV_INLINE 00602 ulint 00603 buf_block_get_space( 00604 /*================*/ 00605 /* out: space id */ 00606 buf_block_t* block); /* in: pointer to the control block */ 00607 /************************************************************************* 00608 Gets the page number of a block. */ 00609 UNIV_INLINE 00610 ulint 00611 buf_block_get_page_no( 00612 /*==================*/ 00613 /* out: page number */ 00614 buf_block_t* block); /* in: pointer to the control block */ 00615 /*********************************************************************** 00616 Gets the block to whose frame the pointer is pointing to. */ 00617 UNIV_INLINE 00618 buf_block_t* 00619 buf_block_align( 00620 /*============*/ 00621 /* out: pointer to block */ 00622 byte* ptr); /* in: pointer to a frame */ 00623 /************************************************************************ 00624 This function is used to get info if there is an io operation 00625 going on on a buffer page. */ 00626 UNIV_INLINE 00627 ibool 00628 buf_page_io_query( 00629 /*==============*/ 00630 /* out: TRUE if io going on */ 00631 buf_block_t* block); /* in: pool block, must be bufferfixed */ 00632 /*********************************************************************** 00633 Accessor function for block array. */ 00634 UNIV_INLINE 00635 buf_block_t* 00636 buf_pool_get_nth_block( 00637 /*===================*/ 00638 /* out: pointer to block */ 00639 buf_pool_t* pool, /* in: pool */ 00640 ulint i); /* in: index of the block */ 00641 /************************************************************************ 00642 Function which inits a page for read to the buffer buf_pool. If the page is 00643 (1) already in buf_pool, or 00644 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or 00645 (3) if the space is deleted or being deleted, 00646 then this function does nothing. 00647 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock 00648 on the buffer frame. The io-handler must take care that the flag is cleared 00649 and the lock released later. This is one of the functions which perform the 00650 state transition NOT_USED => FILE_PAGE to a block (the other is 00651 buf_page_create). */ 00652 00653 buf_block_t* 00654 buf_page_init_for_read( 00655 /*===================*/ 00656 /* out: pointer to the block or NULL */ 00657 ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */ 00658 ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */ 00659 ulint space, /* in: space id */ 00660 ib_longlong tablespace_version,/* in: prevents reading from a wrong 00661 version of the tablespace in case we have done 00662 DISCARD + IMPORT */ 00663 ulint offset);/* in: page number */ 00664 /************************************************************************ 00665 Completes an asynchronous read or write request of a file page to or from 00666 the buffer pool. */ 00667 00668 void 00669 buf_page_io_complete( 00670 /*=================*/ 00671 buf_block_t* block); /* in: pointer to the block in question */ 00672 /************************************************************************ 00673 Calculates a folded value of a file page address to use in the page hash 00674 table. */ 00675 UNIV_INLINE 00676 ulint 00677 buf_page_address_fold( 00678 /*==================*/ 00679 /* out: the folded value */ 00680 ulint space, /* in: space id */ 00681 ulint offset);/* in: offset of the page within space */ 00682 /********************************************************************** 00683 Returns the control block of a file page, NULL if not found. */ 00684 UNIV_INLINE 00685 buf_block_t* 00686 buf_page_hash_get( 00687 /*==============*/ 00688 /* out: block, NULL if not found */ 00689 ulint space, /* in: space id */ 00690 ulint offset);/* in: offset of the page within space */ 00691 /*********************************************************************** 00692 Increments the pool clock by one and returns its new value. Remember that 00693 in the 32 bit version the clock wraps around at 4 billion! */ 00694 UNIV_INLINE 00695 ulint 00696 buf_pool_clock_tic(void); 00697 /*====================*/ 00698 /* out: new clock value */ 00699 /************************************************************************* 00700 Gets the current length of the free list of buffer blocks. */ 00701 00702 ulint 00703 buf_get_free_list_len(void); 00704 /*=======================*/ 00705 00706 00707 00708 /* The buffer control block structure */ 00709 00710 struct buf_block_struct{ 00711 00712 /* 1. General fields */ 00713 00714 ulint magic_n; /* magic number to check */ 00715 ulint state; /* state of the control block: 00716 BUF_BLOCK_NOT_USED, ... */ 00717 byte* frame; /* pointer to buffer frame which 00718 is of size UNIV_PAGE_SIZE, and 00719 aligned to an address divisible by 00720 UNIV_PAGE_SIZE; if AWE is used, this 00721 will be NULL for the pages which are 00722 currently not mapped into the virtual 00723 address space window of the buffer 00724 pool */ 00725 os_awe_t* awe_info; /* if AWE is used, then an array of 00726 awe page infos for 00727 UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE 00728 (normally = 4) physical memory 00729 pages; otherwise NULL */ 00730 ulint space; /* space id of the page */ 00731 ulint offset; /* page number within the space */ 00732 ulint lock_hash_val; /* hashed value of the page address 00733 in the record lock hash table */ 00734 mutex_t* lock_mutex; /* mutex protecting the chain in the 00735 record lock hash table */ 00736 rw_lock_t lock; /* read-write lock of the buffer 00737 frame */ 00738 buf_block_t* hash; /* node used in chaining to the page 00739 hash table */ 00740 ibool check_index_page_at_flush; 00741 /* TRUE if we know that this is 00742 an index page, and want the database 00743 to check its consistency before flush; 00744 note that there may be pages in the 00745 buffer pool which are index pages, 00746 but this flag is not set because 00747 we do not keep track of all pages */ 00748 /* 2. Page flushing fields */ 00749 00750 UT_LIST_NODE_T(buf_block_t) flush_list; 00751 /* node of the modified, not yet 00752 flushed blocks list */ 00753 dulint newest_modification; 00754 /* log sequence number of the youngest 00755 modification to this block, zero if 00756 not modified */ 00757 dulint oldest_modification; 00758 /* log sequence number of the START of 00759 the log entry written of the oldest 00760 modification to this block which has 00761 not yet been flushed on disk; zero if 00762 all modifications are on disk */ 00763 ulint flush_type; /* if this block is currently being 00764 flushed to disk, this tells the 00765 flush_type: BUF_FLUSH_LRU or 00766 BUF_FLUSH_LIST */ 00767 00768 /* 3. LRU replacement algorithm fields */ 00769 00770 UT_LIST_NODE_T(buf_block_t) free; 00771 /* node of the free block list */ 00772 ibool in_free_list; /* TRUE if in the free list; used in 00773 debugging */ 00774 UT_LIST_NODE_T(buf_block_t) LRU; 00775 /* node of the LRU list */ 00776 UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped; 00777 /* in the AWE version node in the 00778 list of free and LRU blocks which are 00779 mapped to a frame */ 00780 ibool in_LRU_list; /* TRUE of the page is in the LRU list; 00781 used in debugging */ 00782 ulint LRU_position; /* value which monotonically 00783 decreases (or may stay constant if 00784 the block is in the old blocks) toward 00785 the end of the LRU list, if the pool 00786 ulint_clock has not wrapped around: 00787 NOTE that this value can only be used 00788 in heuristic algorithms, because of 00789 the possibility of a wrap-around! */ 00790 ulint freed_page_clock;/* the value of freed_page_clock 00791 buffer pool when this block was 00792 last time put to the head of the 00793 LRU list */ 00794 ibool old; /* TRUE if the block is in the old 00795 blocks in the LRU list */ 00796 ibool accessed; /* TRUE if the page has been accessed 00797 while in the buffer pool: read-ahead 00798 may read in pages which have not been 00799 accessed yet */ 00800 ulint buf_fix_count; /* count of how manyfold this block 00801 is currently bufferfixed */ 00802 ulint io_fix; /* if a read is pending to the frame, 00803 io_fix is BUF_IO_READ, in the case 00804 of a write BUF_IO_WRITE, otherwise 0 */ 00805 /* 4. Optimistic search field */ 00806 00807 dulint modify_clock; /* this clock is incremented every 00808 time a pointer to a record on the 00809 page may become obsolete; this is 00810 used in the optimistic cursor 00811 positioning: if the modify clock has 00812 not changed, we know that the pointer 00813 is still valid; this field may be 00814 changed if the thread (1) owns the 00815 pool mutex and the page is not 00816 bufferfixed, or (2) the thread has an 00817 x-latch on the block */ 00818 00819 /* 5. Hash search fields: NOTE that the first 4 fields are NOT 00820 protected by any semaphore! */ 00821 00822 ulint n_hash_helps; /* counter which controls building 00823 of a new hash index for the page */ 00824 ulint n_fields; /* recommended prefix length for hash 00825 search: number of full fields */ 00826 ulint n_bytes; /* recommended prefix: number of bytes 00827 in an incomplete field */ 00828 ulint side; /* BTR_SEARCH_LEFT_SIDE or 00829 BTR_SEARCH_RIGHT_SIDE, depending on 00830 whether the leftmost record of several 00831 records with the same prefix should be 00832 indexed in the hash index */ 00833 00834 /* These 6 fields may only be modified when we have 00835 an x-latch on btr_search_latch AND 00836 a) we are holding an s-latch or x-latch on block->lock or 00837 b) we know that block->buf_fix_count == 0. 00838 00839 An exception to this is when we init or create a page 00840 in the buffer pool in buf0buf.c. */ 00841 00842 ibool is_hashed; /* TRUE if hash index has already been 00843 built on this page; note that it does 00844 not guarantee that the index is 00845 complete, though: there may have been 00846 hash collisions, record deletions, 00847 etc. */ 00848 ulint n_pointers; /* used in debugging: the number of 00849 pointers in the adaptive hash index 00850 pointing to this frame */ 00851 ulint curr_n_fields; /* prefix length for hash indexing: 00852 number of full fields */ 00853 ulint curr_n_bytes; /* number of bytes in hash indexing */ 00854 ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or 00855 BTR_SEARCH_RIGHT_SIDE in hash 00856 indexing */ 00857 dict_index_t* index; /* Index for which the adaptive 00858 hash index has been created. */ 00859 /* 6. Debug fields */ 00860 #ifdef UNIV_SYNC_DEBUG 00861 rw_lock_t debug_latch; /* in the debug version, each thread 00862 which bufferfixes the block acquires 00863 an s-latch here; so we can use the 00864 debug utilities in sync0rw */ 00865 #endif 00866 ibool file_page_was_freed; 00867 /* this is set to TRUE when fsp 00868 frees a page in buffer pool */ 00869 }; 00870 00871 #define BUF_BLOCK_MAGIC_N 41526563 00872 00873 /* The buffer pool structure. NOTE! The definition appears here only for 00874 other modules of this directory (buf) to see it. Do not use from outside! */ 00875 00876 struct buf_pool_struct{ 00877 00878 /* 1. General fields */ 00879 00880 mutex_t mutex; /* mutex protecting the buffer pool 00881 struct and control blocks, except the 00882 read-write lock in them */ 00883 byte* frame_mem; /* pointer to the memory area which 00884 was allocated for the frames; in AWE 00885 this is the virtual address space 00886 window where we map pages stored 00887 in physical memory */ 00888 byte* frame_zero; /* pointer to the first buffer frame: 00889 this may differ from frame_mem, because 00890 this is aligned by the frame size */ 00891 byte* high_end; /* pointer to the end of the buffer 00892 frames */ 00893 ulint n_frames; /* number of frames */ 00894 buf_block_t* blocks; /* array of buffer control blocks */ 00895 buf_block_t** blocks_of_frames;/* inverse mapping which can be used 00896 to retrieve the buffer control block 00897 of a frame; this is an array which 00898 lists the blocks of frames in the 00899 order frame_zero, 00900 frame_zero + UNIV_PAGE_SIZE, ... 00901 a control block is always assigned 00902 for each frame, even if the frame does 00903 not contain any data; note that in AWE 00904 there are more control blocks than 00905 buffer frames */ 00906 os_awe_t* awe_info; /* if AWE is used, AWE info for the 00907 physical 4 kB memory pages associated 00908 with buffer frames */ 00909 ulint max_size; /* number of control blocks == 00910 maximum pool size in pages */ 00911 ulint curr_size; /* current pool size in pages; 00912 currently always the same as 00913 max_size */ 00914 hash_table_t* page_hash; /* hash table of the file pages */ 00915 00916 ulint n_pend_reads; /* number of pending read operations */ 00917 00918 time_t last_printout_time; /* when buf_print was last time 00919 called */ 00920 ulint n_pages_read; /* number read operations */ 00921 ulint n_pages_written;/* number write operations */ 00922 ulint n_pages_created;/* number of pages created in the pool 00923 with no read */ 00924 ulint n_page_gets; /* number of page gets performed; 00925 also successful searches through 00926 the adaptive hash index are 00927 counted as page gets; this field 00928 is NOT protected by the buffer 00929 pool mutex */ 00930 ulint n_pages_awe_remapped; /* if AWE is enabled, the 00931 number of remaps of blocks to 00932 buffer frames */ 00933 ulint n_page_gets_old;/* n_page_gets when buf_print was 00934 last time called: used to calculate 00935 hit rate */ 00936 ulint n_pages_read_old;/* n_pages_read when buf_print was 00937 last time called */ 00938 ulint n_pages_written_old;/* number write operations */ 00939 ulint n_pages_created_old;/* number of pages created in 00940 the pool with no read */ 00941 ulint n_pages_awe_remapped_old; 00942 /* 2. Page flushing algorithm fields */ 00943 00944 UT_LIST_BASE_NODE_T(buf_block_t) flush_list; 00945 /* base node of the modified block 00946 list */ 00947 ibool init_flush[BUF_FLUSH_LIST + 1]; 00948 /* this is TRUE when a flush of the 00949 given type is being initialized */ 00950 ulint n_flush[BUF_FLUSH_LIST + 1]; 00951 /* this is the number of pending 00952 writes in the given flush type */ 00953 os_event_t no_flush[BUF_FLUSH_LIST + 1]; 00954 /* this is in the set state when there 00955 is no flush batch of the given type 00956 running */ 00957 ulint ulint_clock; /* a sequence number used to count 00958 time. NOTE! This counter wraps 00959 around at 4 billion (if ulint == 00960 32 bits)! */ 00961 ulint freed_page_clock;/* a sequence number used to count the 00962 number of buffer blocks removed from 00963 the end of the LRU list; NOTE that 00964 this counter may wrap around at 4 00965 billion! */ 00966 ulint LRU_flush_ended;/* when an LRU flush ends for a page, 00967 this is incremented by one; this is 00968 set to zero when a buffer block is 00969 allocated */ 00970 00971 /* 3. LRU replacement algorithm fields */ 00972 00973 UT_LIST_BASE_NODE_T(buf_block_t) free; 00974 /* base node of the free block list; 00975 in the case of AWE, at the start are 00976 always free blocks for which the 00977 physical memory is mapped to a frame */ 00978 UT_LIST_BASE_NODE_T(buf_block_t) LRU; 00979 /* base node of the LRU list */ 00980 buf_block_t* LRU_old; /* pointer to the about 3/8 oldest 00981 blocks in the LRU list; NULL if LRU 00982 length less than BUF_LRU_OLD_MIN_LEN */ 00983 ulint LRU_old_len; /* length of the LRU list from 00984 the block to which LRU_old points 00985 onward, including that block; 00986 see buf0lru.c for the restrictions 00987 on this value; not defined if 00988 LRU_old == NULL */ 00989 UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped; 00990 /* list of those blocks which are 00991 in the LRU list or the free list, and 00992 where the page is mapped to a frame; 00993 thus, frames allocated, e.g., to the 00994 locki table, are not in this list */ 00995 }; 00996 00997 /* States of a control block */ 00998 #define BUF_BLOCK_NOT_USED 211 /* is in the free list */ 00999 #define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns 01000 a block, it is in this state */ 01001 #define BUF_BLOCK_FILE_PAGE 213 /* contains a buffered file page */ 01002 #define BUF_BLOCK_MEMORY 214 /* contains some main memory object */ 01003 #define BUF_BLOCK_REMOVE_HASH 215 /* hash index should be removed 01004 before putting to the free list */ 01005 01006 /* Io_fix states of a control block; these must be != 0 */ 01007 #define BUF_IO_READ 561 01008 #define BUF_IO_WRITE 562 01009 01010 /************************************************************************ 01011 Let us list the consistency conditions for different control block states. 01012 01013 NOT_USED: is in free list, not in LRU list, not in flush list, nor 01014 page hash table 01015 READY_FOR_USE: is not in free list, LRU list, or flush list, nor page 01016 hash table 01017 MEMORY: is not in free list, LRU list, or flush list, nor page 01018 hash table 01019 FILE_PAGE: space and offset are defined, is in page hash table 01020 if io_fix == BUF_IO_WRITE, 01021 pool: no_flush[block->flush_type] is in reset state, 01022 pool: n_flush[block->flush_type] > 0 01023 01024 (1) if buf_fix_count == 0, then 01025 is in LRU list, not in free list 01026 is in flush list, 01027 if and only if oldest_modification > 0 01028 is x-locked, 01029 if and only if io_fix == BUF_IO_READ 01030 is s-locked, 01031 if and only if io_fix == BUF_IO_WRITE 01032 01033 (2) if buf_fix_count > 0, then 01034 is not in LRU list, not in free list 01035 is in flush list, 01036 if and only if oldest_modification > 0 01037 if io_fix == BUF_IO_READ, 01038 is x-locked 01039 if io_fix == BUF_IO_WRITE, 01040 is s-locked 01041 01042 State transitions: 01043 01044 NOT_USED => READY_FOR_USE 01045 READY_FOR_USE => MEMORY 01046 READY_FOR_USE => FILE_PAGE 01047 MEMORY => NOT_USED 01048 FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if 01049 (1) buf_fix_count == 0, 01050 (2) oldest_modification == 0, and 01051 (3) io_fix == 0. 01052 */ 01053 01054 #ifndef UNIV_NONINL 01055 #include "buf0buf.ic" 01056 #endif 01057 01058 #endif
1.4.7

