00001 /****************************************************** 00002 Database log 00003 00004 (c) 1995-1997 Innobase Oy 00005 00006 Created 12/9/1995 Heikki Tuuri 00007 *******************************************************/ 00008 00009 #include "log0log.h" 00010 00011 #ifdef UNIV_NONINL 00012 #include "log0log.ic" 00013 #endif 00014 00015 #include "mem0mem.h" 00016 #include "buf0buf.h" 00017 #include "buf0flu.h" 00018 #include "srv0srv.h" 00019 #include "log0recv.h" 00020 #include "fil0fil.h" 00021 #include "dict0boot.h" 00022 #include "srv0srv.h" 00023 #include "srv0start.h" 00024 #include "trx0sys.h" 00025 #include "trx0trx.h" 00026 00027 /* 00028 General philosophy of InnoDB redo-logs: 00029 00030 1) Every change to a contents of a data page must be done 00031 through mtr, which in mtr_commit() writes log records 00032 to the InnoDB redo log. 00033 00034 2) Normally these changes are performed using a mlog_write_ulint() 00035 or similar function. 00036 00037 3) In some page level operations only a code number of a 00038 c-function and its parameters are written to the log to 00039 reduce the size of the log. 00040 00041 3a) You should not add parameters to these kind of functions 00042 (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse()) 00043 00044 3b) You should not add such functionality which either change 00045 working when compared with the old or are dependent on data 00046 outside of the page. These kind of functions should implement 00047 self-contained page transformation and it should be unchanged 00048 if you don't have very essential reasons to change log 00049 semantics or format. 00050 00051 */ 00052 00053 /* Current free limit of space 0; protected by the log sys mutex; 0 means 00054 uninitialized */ 00055 ulint log_fsp_current_free_limit = 0; 00056 00057 /* Global log system variable */ 00058 log_t* log_sys = NULL; 00059 00060 #ifdef UNIV_DEBUG 00061 ibool log_do_write = TRUE; 00062 00063 ibool log_debug_writes = FALSE; 00064 #endif /* UNIV_DEBUG */ 00065 00066 /* These control how often we print warnings if the last checkpoint is too 00067 old */ 00068 ibool log_has_printed_chkp_warning = FALSE; 00069 time_t log_last_warning_time; 00070 00071 #ifdef UNIV_LOG_ARCHIVE 00072 /* Pointer to this variable is used as the i/o-message when we do i/o to an 00073 archive */ 00074 byte log_archive_io; 00075 #endif /* UNIV_LOG_ARCHIVE */ 00076 00077 /* A margin for free space in the log buffer before a log entry is catenated */ 00078 #define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE) 00079 00080 /* Margins for free space in the log buffer after a log entry is catenated */ 00081 #define LOG_BUF_FLUSH_RATIO 2 00082 #define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE) 00083 00084 /* Margin for the free space in the smallest log group, before a new query 00085 step which modifies the database, is started */ 00086 00087 #define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE) 00088 #define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE) 00089 00090 /* This parameter controls asynchronous making of a new checkpoint; the value 00091 should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */ 00092 00093 #define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32 00094 00095 /* This parameter controls synchronous preflushing of modified buffer pages */ 00096 #define LOG_POOL_PREFLUSH_RATIO_SYNC 16 00097 00098 /* The same ratio for asynchronous preflushing; this value should be less than 00099 the previous */ 00100 #define LOG_POOL_PREFLUSH_RATIO_ASYNC 8 00101 00102 /* Extra margin, in addition to one log file, used in archiving */ 00103 #define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE) 00104 00105 /* This parameter controls asynchronous writing to the archive */ 00106 #define LOG_ARCHIVE_RATIO_ASYNC 16 00107 00108 /* Codes used in unlocking flush latches */ 00109 #define LOG_UNLOCK_NONE_FLUSHED_LOCK 1 00110 #define LOG_UNLOCK_FLUSH_LOCK 2 00111 00112 /* States of an archiving operation */ 00113 #define LOG_ARCHIVE_READ 1 00114 #define LOG_ARCHIVE_WRITE 2 00115 00116 /********************************************************** 00117 Completes a checkpoint write i/o to a log file. */ 00118 static 00119 void 00120 log_io_complete_checkpoint(void); 00121 /*============================*/ 00122 #ifdef UNIV_LOG_ARCHIVE 00123 /********************************************************** 00124 Completes an archiving i/o. */ 00125 static 00126 void 00127 log_io_complete_archive(void); 00128 /*=========================*/ 00129 #endif /* UNIV_LOG_ARCHIVE */ 00130 00131 /******************************************************************** 00132 Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint, 00133 so that we know that the limit has been written to a log checkpoint field 00134 on disk. */ 00135 00136 void 00137 log_fsp_current_free_limit_set_and_checkpoint( 00138 /*==========================================*/ 00139 ulint limit) /* in: limit to set */ 00140 { 00141 ibool success; 00142 00143 mutex_enter(&(log_sys->mutex)); 00144 00145 log_fsp_current_free_limit = limit; 00146 00147 mutex_exit(&(log_sys->mutex)); 00148 00149 /* Try to make a synchronous checkpoint */ 00150 00151 success = FALSE; 00152 00153 while (!success) { 00154 success = log_checkpoint(TRUE, TRUE); 00155 } 00156 } 00157 00158 /******************************************************************** 00159 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none 00160 exists. */ 00161 static 00162 dulint 00163 log_buf_pool_get_oldest_modification(void) 00164 /*======================================*/ 00165 { 00166 dulint lsn; 00167 00168 #ifdef UNIV_SYNC_DEBUG 00169 ut_ad(mutex_own(&(log_sys->mutex))); 00170 #endif /* UNIV_SYNC_DEBUG */ 00171 00172 lsn = buf_pool_get_oldest_modification(); 00173 00174 if (ut_dulint_is_zero(lsn)) { 00175 00176 lsn = log_sys->lsn; 00177 } 00178 00179 return(lsn); 00180 } 00181 00182 /**************************************************************** 00183 Opens the log for log_write_low. The log must be closed with log_close and 00184 released with log_release. */ 00185 00186 dulint 00187 log_reserve_and_open( 00188 /*=================*/ 00189 /* out: start lsn of the log record */ 00190 ulint len) /* in: length of data to be catenated */ 00191 { 00192 log_t* log = log_sys; 00193 ulint len_upper_limit; 00194 #ifdef UNIV_LOG_ARCHIVE 00195 ulint archived_lsn_age; 00196 ulint dummy; 00197 #endif /* UNIV_LOG_ARCHIVE */ 00198 #ifdef UNIV_DEBUG 00199 ulint count = 0; 00200 #endif /* UNIV_DEBUG */ 00201 00202 ut_a(len < log->buf_size / 2); 00203 loop: 00204 mutex_enter(&(log->mutex)); 00205 00206 /* Calculate an upper limit for the space the string may take in the 00207 log buffer */ 00208 00209 len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4; 00210 00211 if (log->buf_free + len_upper_limit > log->buf_size) { 00212 00213 mutex_exit(&(log->mutex)); 00214 00215 /* Not enough free space, do a syncronous flush of the log 00216 buffer */ 00217 00218 log_buffer_flush_to_disk(); 00219 00220 srv_log_waits++; 00221 00222 ut_ad(++count < 50); 00223 00224 goto loop; 00225 } 00226 00227 #ifdef UNIV_LOG_ARCHIVE 00228 if (log->archiving_state != LOG_ARCH_OFF) { 00229 00230 archived_lsn_age = ut_dulint_minus(log->lsn, 00231 log->archived_lsn); 00232 if (archived_lsn_age + len_upper_limit 00233 > log->max_archived_lsn_age) { 00234 /* Not enough free archived space in log groups: do a 00235 synchronous archive write batch: */ 00236 00237 mutex_exit(&(log->mutex)); 00238 00239 ut_ad(len_upper_limit <= log->max_archived_lsn_age); 00240 00241 log_archive_do(TRUE, &dummy); 00242 00243 ut_ad(++count < 50); 00244 00245 goto loop; 00246 } 00247 } 00248 #endif /* UNIV_LOG_ARCHIVE */ 00249 00250 #ifdef UNIV_LOG_DEBUG 00251 log->old_buf_free = log->buf_free; 00252 log->old_lsn = log->lsn; 00253 #endif 00254 return(log->lsn); 00255 } 00256 00257 /**************************************************************** 00258 Writes to the log the string given. It is assumed that the caller holds the 00259 log mutex. */ 00260 00261 void 00262 log_write_low( 00263 /*==========*/ 00264 byte* str, /* in: string */ 00265 ulint str_len) /* in: string length */ 00266 { 00267 log_t* log = log_sys; 00268 ulint len; 00269 ulint data_len; 00270 byte* log_block; 00271 00272 #ifdef UNIV_SYNC_DEBUG 00273 ut_ad(mutex_own(&(log->mutex))); 00274 #endif /* UNIV_SYNC_DEBUG */ 00275 part_loop: 00276 /* Calculate a part length */ 00277 00278 data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; 00279 00280 if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { 00281 00282 /* The string fits within the current log block */ 00283 00284 len = str_len; 00285 } else { 00286 data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; 00287 00288 len = OS_FILE_LOG_BLOCK_SIZE 00289 - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) 00290 - LOG_BLOCK_TRL_SIZE; 00291 } 00292 00293 ut_memcpy(log->buf + log->buf_free, str, len); 00294 00295 str_len -= len; 00296 str = str + len; 00297 00298 log_block = ut_align_down(log->buf + log->buf_free, 00299 OS_FILE_LOG_BLOCK_SIZE); 00300 log_block_set_data_len(log_block, data_len); 00301 00302 if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { 00303 /* This block became full */ 00304 log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE); 00305 log_block_set_checkpoint_no(log_block, 00306 log_sys->next_checkpoint_no); 00307 len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE; 00308 00309 log->lsn = ut_dulint_add(log->lsn, len); 00310 00311 /* Initialize the next block header */ 00312 log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn); 00313 } else { 00314 log->lsn = ut_dulint_add(log->lsn, len); 00315 } 00316 00317 log->buf_free += len; 00318 00319 ut_ad(log->buf_free <= log->buf_size); 00320 00321 if (str_len > 0) { 00322 goto part_loop; 00323 } 00324 00325 srv_log_write_requests++; 00326 } 00327 00328 /**************************************************************** 00329 Closes the log. */ 00330 00331 dulint 00332 log_close(void) 00333 /*===========*/ 00334 /* out: lsn */ 00335 { 00336 byte* log_block; 00337 ulint first_rec_group; 00338 dulint oldest_lsn; 00339 dulint lsn; 00340 log_t* log = log_sys; 00341 ulint checkpoint_age; 00342 00343 #ifdef UNIV_SYNC_DEBUG 00344 ut_ad(mutex_own(&(log->mutex))); 00345 #endif /* UNIV_SYNC_DEBUG */ 00346 00347 lsn = log->lsn; 00348 00349 log_block = ut_align_down(log->buf + log->buf_free, 00350 OS_FILE_LOG_BLOCK_SIZE); 00351 first_rec_group = log_block_get_first_rec_group(log_block); 00352 00353 if (first_rec_group == 0) { 00354 /* We initialized a new log block which was not written 00355 full by the current mtr: the next mtr log record group 00356 will start within this block at the offset data_len */ 00357 00358 log_block_set_first_rec_group(log_block, 00359 log_block_get_data_len(log_block)); 00360 } 00361 00362 if (log->buf_free > log->max_buf_free) { 00363 00364 log->check_flush_or_checkpoint = TRUE; 00365 } 00366 00367 checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn); 00368 00369 if (checkpoint_age >= log->log_group_capacity) { 00370 /* TODO: split btr_store_big_rec_extern_fields() into small 00371 steps so that we can release all latches in the middle, and 00372 call log_free_check() to ensure we never write over log written 00373 after the latest checkpoint. In principle, we should split all 00374 big_rec operations, but other operations are smaller. */ 00375 00376 if (!log_has_printed_chkp_warning 00377 || difftime(time(NULL), log_last_warning_time) > 15) { 00378 00379 log_has_printed_chkp_warning = TRUE; 00380 log_last_warning_time = time(NULL); 00381 00382 ut_print_timestamp(stderr); 00383 fprintf(stderr, 00384 " InnoDB: ERROR: the age of the last checkpoint is %lu,\n" 00385 "InnoDB: which exceeds the log group capacity %lu.\n" 00386 "InnoDB: If you are using big BLOB or TEXT rows, you must set the\n" 00387 "InnoDB: combined size of log files at least 10 times bigger than the\n" 00388 "InnoDB: largest such row.\n", 00389 (ulong) checkpoint_age, 00390 (ulong) log->log_group_capacity); 00391 } 00392 } 00393 00394 if (checkpoint_age <= log->max_modified_age_async) { 00395 00396 goto function_exit; 00397 } 00398 00399 oldest_lsn = buf_pool_get_oldest_modification(); 00400 00401 if (ut_dulint_is_zero(oldest_lsn) 00402 || (ut_dulint_minus(lsn, oldest_lsn) 00403 > log->max_modified_age_async) 00404 || checkpoint_age > log->max_checkpoint_age_async) { 00405 00406 log->check_flush_or_checkpoint = TRUE; 00407 } 00408 function_exit: 00409 00410 #ifdef UNIV_LOG_DEBUG 00411 log_check_log_recs(log->buf + log->old_buf_free, 00412 log->buf_free - log->old_buf_free, log->old_lsn); 00413 #endif 00414 00415 return(lsn); 00416 } 00417 00418 #ifdef UNIV_LOG_ARCHIVE 00419 /********************************************************** 00420 Pads the current log block full with dummy log records. Used in producing 00421 consistent archived log files. */ 00422 static 00423 void 00424 log_pad_current_log_block(void) 00425 /*===========================*/ 00426 { 00427 byte b = MLOG_DUMMY_RECORD; 00428 ulint pad_length; 00429 ulint i; 00430 dulint lsn; 00431 00432 /* We retrieve lsn only because otherwise gcc crashed on HP-UX */ 00433 lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE); 00434 00435 pad_length = OS_FILE_LOG_BLOCK_SIZE 00436 - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE) 00437 - LOG_BLOCK_TRL_SIZE; 00438 00439 for (i = 0; i < pad_length; i++) { 00440 log_write_low(&b, 1); 00441 } 00442 00443 lsn = log_sys->lsn; 00444 00445 log_close(); 00446 log_release(); 00447 00448 ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) 00449 == LOG_BLOCK_HDR_SIZE); 00450 } 00451 #endif /* UNIV_LOG_ARCHIVE */ 00452 00453 /********************************************************** 00454 Calculates the data capacity of a log group, when the log file headers are not 00455 included. */ 00456 00457 ulint 00458 log_group_get_capacity( 00459 /*===================*/ 00460 /* out: capacity in bytes */ 00461 log_group_t* group) /* in: log group */ 00462 { 00463 #ifdef UNIV_SYNC_DEBUG 00464 ut_ad(mutex_own(&(log_sys->mutex))); 00465 #endif /* UNIV_SYNC_DEBUG */ 00466 00467 return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files); 00468 } 00469 00470 /********************************************************** 00471 Calculates the offset within a log group, when the log file headers are not 00472 included. */ 00473 UNIV_INLINE 00474 ulint 00475 log_group_calc_size_offset( 00476 /*=======================*/ 00477 /* out: size offset (<= offset) */ 00478 ulint offset, /* in: real offset within the log group */ 00479 log_group_t* group) /* in: log group */ 00480 { 00481 #ifdef UNIV_SYNC_DEBUG 00482 ut_ad(mutex_own(&(log_sys->mutex))); 00483 #endif /* UNIV_SYNC_DEBUG */ 00484 00485 return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size)); 00486 } 00487 00488 /********************************************************** 00489 Calculates the offset within a log group, when the log file headers are 00490 included. */ 00491 UNIV_INLINE 00492 ulint 00493 log_group_calc_real_offset( 00494 /*=======================*/ 00495 /* out: real offset (>= offset) */ 00496 ulint offset, /* in: size offset within the log group */ 00497 log_group_t* group) /* in: log group */ 00498 { 00499 #ifdef UNIV_SYNC_DEBUG 00500 ut_ad(mutex_own(&(log_sys->mutex))); 00501 #endif /* UNIV_SYNC_DEBUG */ 00502 00503 return(offset + LOG_FILE_HDR_SIZE 00504 * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE))); 00505 } 00506 00507 /********************************************************** 00508 Calculates the offset of an lsn within a log group. */ 00509 static 00510 ulint 00511 log_group_calc_lsn_offset( 00512 /*======================*/ 00513 /* out: offset within the log group */ 00514 dulint lsn, /* in: lsn, must be within 4 GB of 00515 group->lsn */ 00516 log_group_t* group) /* in: log group */ 00517 { 00518 dulint gr_lsn; 00519 ib_longlong gr_lsn_size_offset; 00520 ib_longlong difference; 00521 ib_longlong group_size; 00522 ib_longlong offset; 00523 00524 #ifdef UNIV_SYNC_DEBUG 00525 ut_ad(mutex_own(&(log_sys->mutex))); 00526 #endif /* UNIV_SYNC_DEBUG */ 00527 00528 /* If total log file size is > 2 GB we can easily get overflows 00529 with 32-bit integers. Use 64-bit integers instead. */ 00530 00531 gr_lsn = group->lsn; 00532 00533 gr_lsn_size_offset = (ib_longlong) 00534 log_group_calc_size_offset(group->lsn_offset, group); 00535 00536 group_size = (ib_longlong) log_group_get_capacity(group); 00537 00538 if (ut_dulint_cmp(lsn, gr_lsn) >= 0) { 00539 00540 difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn); 00541 } else { 00542 difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn); 00543 00544 difference = difference % group_size; 00545 00546 difference = group_size - difference; 00547 } 00548 00549 offset = (gr_lsn_size_offset + difference) % group_size; 00550 00551 ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */ 00552 00553 /* fprintf(stderr, 00554 "Offset is %lu gr_lsn_offset is %lu difference is %lu\n", 00555 (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference); 00556 */ 00557 00558 return(log_group_calc_real_offset((ulint)offset, group)); 00559 } 00560 00561 /*********************************************************************** 00562 Calculates where in log files we find a specified lsn. */ 00563 00564 ulint 00565 log_calc_where_lsn_is( 00566 /*==================*/ 00567 /* out: log file number */ 00568 ib_longlong* log_file_offset, /* out: offset in that file 00569 (including the header) */ 00570 dulint first_header_lsn, /* in: first log file start 00571 lsn */ 00572 dulint lsn, /* in: lsn whose position to 00573 determine */ 00574 ulint n_log_files, /* in: total number of log 00575 files */ 00576 ib_longlong log_file_size) /* in: log file size 00577 (including the header) */ 00578 { 00579 ib_longlong ib_lsn; 00580 ib_longlong ib_first_header_lsn; 00581 ib_longlong capacity = log_file_size - LOG_FILE_HDR_SIZE; 00582 ulint file_no; 00583 ib_longlong add_this_many; 00584 00585 ib_lsn = ut_conv_dulint_to_longlong(lsn); 00586 ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn); 00587 00588 if (ib_lsn < ib_first_header_lsn) { 00589 add_this_many = 1 + (ib_first_header_lsn - ib_lsn) 00590 / (capacity * (ib_longlong)n_log_files); 00591 ib_lsn += add_this_many 00592 * capacity * (ib_longlong)n_log_files; 00593 } 00594 00595 ut_a(ib_lsn >= ib_first_header_lsn); 00596 00597 file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity)) 00598 % n_log_files; 00599 *log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity; 00600 00601 *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE; 00602 00603 return(file_no); 00604 } 00605 00606 /************************************************************ 00607 Sets the field values in group to correspond to a given lsn. For this function 00608 to work, the values must already be correctly initialized to correspond to 00609 some lsn, for instance, a checkpoint lsn. */ 00610 00611 void 00612 log_group_set_fields( 00613 /*=================*/ 00614 log_group_t* group, /* in: group */ 00615 dulint lsn) /* in: lsn for which the values should be 00616 set */ 00617 { 00618 group->lsn_offset = log_group_calc_lsn_offset(lsn, group); 00619 group->lsn = lsn; 00620 } 00621 00622 /********************************************************************* 00623 Calculates the recommended highest values for lsn - last_checkpoint_lsn, 00624 lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */ 00625 static 00626 ibool 00627 log_calc_max_ages(void) 00628 /*===================*/ 00629 /* out: error value FALSE if the smallest log group is 00630 too small to accommodate the number of OS threads in 00631 the database server */ 00632 { 00633 log_group_t* group; 00634 ulint margin; 00635 ulint free; 00636 ibool success = TRUE; 00637 ulint smallest_capacity; 00638 ulint archive_margin; 00639 ulint smallest_archive_margin; 00640 00641 #ifdef UNIV_SYNC_DEBUG 00642 ut_ad(!mutex_own(&(log_sys->mutex))); 00643 #endif /* UNIV_SYNC_DEBUG */ 00644 00645 mutex_enter(&(log_sys->mutex)); 00646 00647 group = UT_LIST_GET_FIRST(log_sys->log_groups); 00648 00649 ut_ad(group); 00650 00651 smallest_capacity = ULINT_MAX; 00652 smallest_archive_margin = ULINT_MAX; 00653 00654 while (group) { 00655 if (log_group_get_capacity(group) < smallest_capacity) { 00656 00657 smallest_capacity = log_group_get_capacity(group); 00658 } 00659 00660 archive_margin = log_group_get_capacity(group) 00661 - (group->file_size - LOG_FILE_HDR_SIZE) 00662 - LOG_ARCHIVE_EXTRA_MARGIN; 00663 00664 if (archive_margin < smallest_archive_margin) { 00665 00666 smallest_archive_margin = archive_margin; 00667 } 00668 00669 group = UT_LIST_GET_NEXT(log_groups, group); 00670 } 00671 00672 /* Add extra safety */ 00673 smallest_capacity = smallest_capacity - smallest_capacity / 10; 00674 00675 /* For each OS thread we must reserve so much free space in the 00676 smallest log group that it can accommodate the log entries produced 00677 by single query steps: running out of free log space is a serious 00678 system error which requires rebooting the database. */ 00679 00680 free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency) 00681 + LOG_CHECKPOINT_EXTRA_FREE; 00682 if (free >= smallest_capacity / 2) { 00683 success = FALSE; 00684 00685 goto failure; 00686 } else { 00687 margin = smallest_capacity - free; 00688 } 00689 00690 margin = ut_min(margin, log_sys->adm_checkpoint_interval); 00691 00692 margin = margin - margin / 10; /* Add still some extra safety */ 00693 00694 log_sys->log_group_capacity = smallest_capacity; 00695 00696 log_sys->max_modified_age_async = margin 00697 - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; 00698 log_sys->max_modified_age_sync = margin 00699 - margin / LOG_POOL_PREFLUSH_RATIO_SYNC; 00700 00701 log_sys->max_checkpoint_age_async = margin - margin 00702 / LOG_POOL_CHECKPOINT_RATIO_ASYNC; 00703 log_sys->max_checkpoint_age = margin; 00704 00705 #ifdef UNIV_LOG_ARCHIVE 00706 log_sys->max_archived_lsn_age = smallest_archive_margin; 00707 00708 log_sys->max_archived_lsn_age_async = smallest_archive_margin 00709 - smallest_archive_margin / 00710 LOG_ARCHIVE_RATIO_ASYNC; 00711 #endif /* UNIV_LOG_ARCHIVE */ 00712 failure: 00713 mutex_exit(&(log_sys->mutex)); 00714 00715 if (!success) { 00716 fprintf(stderr, 00717 "InnoDB: Error: ib_logfiles are too small for innodb_thread_concurrency %lu.\n" 00718 "InnoDB: The combined size of ib_logfiles should be bigger than\n" 00719 "InnoDB: 200 kB * innodb_thread_concurrency.\n" 00720 "InnoDB: To get mysqld to start up, set innodb_thread_concurrency in my.cnf\n" 00721 "InnoDB: to a lower value, for example, to 8. After an ERROR-FREE shutdown\n" 00722 "InnoDB: of mysqld you can adjust the size of ib_logfiles, as explained in\n" 00723 "InnoDB: http://dev.mysql.com/doc/mysql/en/Adding_and_removing.html\n" 00724 "InnoDB: Cannot continue operation. Calling exit(1).\n", 00725 (ulong)srv_thread_concurrency); 00726 00727 exit(1); 00728 } 00729 00730 return(success); 00731 } 00732 00733 /********************************************************** 00734 Initializes the log. */ 00735 00736 void 00737 log_init(void) 00738 /*==========*/ 00739 { 00740 byte* buf; 00741 00742 log_sys = mem_alloc(sizeof(log_t)); 00743 00744 mutex_create(&log_sys->mutex, SYNC_LOG); 00745 00746 mutex_enter(&(log_sys->mutex)); 00747 00748 /* Start the lsn from one log block from zero: this way every 00749 log record has a start lsn != zero, a fact which we will use */ 00750 00751 log_sys->lsn = LOG_START_LSN; 00752 00753 ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE); 00754 ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); 00755 00756 buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); 00757 log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); 00758 00759 log_sys->buf_size = LOG_BUFFER_SIZE; 00760 00761 memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); 00762 00763 log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO 00764 - LOG_BUF_FLUSH_MARGIN; 00765 log_sys->check_flush_or_checkpoint = TRUE; 00766 UT_LIST_INIT(log_sys->log_groups); 00767 00768 log_sys->n_log_ios = 0; 00769 00770 log_sys->n_log_ios_old = log_sys->n_log_ios; 00771 log_sys->last_printout_time = time(NULL); 00772 /*----------------------------*/ 00773 00774 log_sys->buf_next_to_write = 0; 00775 00776 log_sys->write_lsn = ut_dulint_zero; 00777 log_sys->current_flush_lsn = ut_dulint_zero; 00778 log_sys->flushed_to_disk_lsn = ut_dulint_zero; 00779 00780 log_sys->written_to_some_lsn = log_sys->lsn; 00781 log_sys->written_to_all_lsn = log_sys->lsn; 00782 00783 log_sys->n_pending_writes = 0; 00784 00785 log_sys->no_flush_event = os_event_create(NULL); 00786 00787 os_event_set(log_sys->no_flush_event); 00788 00789 log_sys->one_flushed_event = os_event_create(NULL); 00790 00791 os_event_set(log_sys->one_flushed_event); 00792 00793 /*----------------------------*/ 00794 log_sys->adm_checkpoint_interval = ULINT_MAX; 00795 00796 log_sys->next_checkpoint_no = ut_dulint_zero; 00797 log_sys->last_checkpoint_lsn = log_sys->lsn; 00798 log_sys->n_pending_checkpoint_writes = 0; 00799 00800 rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK); 00801 00802 log_sys->checkpoint_buf = ut_align( 00803 mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), 00804 OS_FILE_LOG_BLOCK_SIZE); 00805 memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); 00806 /*----------------------------*/ 00807 00808 #ifdef UNIV_LOG_ARCHIVE 00809 /* Under MySQL, log archiving is always off */ 00810 log_sys->archiving_state = LOG_ARCH_OFF; 00811 log_sys->archived_lsn = log_sys->lsn; 00812 log_sys->next_archived_lsn = ut_dulint_zero; 00813 00814 log_sys->n_pending_archive_ios = 0; 00815 00816 rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK); 00817 00818 log_sys->archive_buf = NULL; 00819 00820 /* ut_align( 00821 ut_malloc(LOG_ARCHIVE_BUF_SIZE 00822 + OS_FILE_LOG_BLOCK_SIZE), 00823 OS_FILE_LOG_BLOCK_SIZE); */ 00824 log_sys->archive_buf_size = 0; 00825 00826 /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */ 00827 00828 log_sys->archiving_on = os_event_create(NULL); 00829 #endif /* UNIV_LOG_ARCHIVE */ 00830 00831 /*----------------------------*/ 00832 00833 log_block_init(log_sys->buf, log_sys->lsn); 00834 log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); 00835 00836 log_sys->buf_free = LOG_BLOCK_HDR_SIZE; 00837 log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE); 00838 00839 mutex_exit(&(log_sys->mutex)); 00840 00841 #ifdef UNIV_LOG_DEBUG 00842 recv_sys_create(); 00843 recv_sys_init(FALSE, buf_pool_get_curr_size()); 00844 00845 recv_sys->parse_start_lsn = log_sys->lsn; 00846 recv_sys->scanned_lsn = log_sys->lsn; 00847 recv_sys->scanned_checkpoint_no = 0; 00848 recv_sys->recovered_lsn = log_sys->lsn; 00849 recv_sys->limit_lsn = ut_dulint_max; 00850 #endif 00851 } 00852 00853 /********************************************************************** 00854 Inits a log group to the log system. */ 00855 00856 void 00857 log_group_init( 00858 /*===========*/ 00859 ulint id, /* in: group id */ 00860 ulint n_files, /* in: number of log files */ 00861 ulint file_size, /* in: log file size in bytes */ 00862 ulint space_id, /* in: space id of the file space 00863 which contains the log files of this 00864 group */ 00865 ulint archive_space_id __attribute__((unused))) 00866 /* in: space id of the file space 00867 which contains some archived log 00868 files for this group; currently, only 00869 for the first log group this is 00870 used */ 00871 { 00872 ulint i; 00873 00874 log_group_t* group; 00875 00876 group = mem_alloc(sizeof(log_group_t)); 00877 00878 group->id = id; 00879 group->n_files = n_files; 00880 group->file_size = file_size; 00881 group->space_id = space_id; 00882 group->state = LOG_GROUP_OK; 00883 group->lsn = LOG_START_LSN; 00884 group->lsn_offset = LOG_FILE_HDR_SIZE; 00885 group->n_pending_writes = 0; 00886 00887 group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files); 00888 #ifdef UNIV_LOG_ARCHIVE 00889 group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files); 00890 #endif /* UNIV_LOG_ARCHIVE */ 00891 00892 for (i = 0; i < n_files; i++) { 00893 *(group->file_header_bufs + i) = ut_align( 00894 mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), 00895 OS_FILE_LOG_BLOCK_SIZE); 00896 00897 memset(*(group->file_header_bufs + i), '\0', 00898 LOG_FILE_HDR_SIZE); 00899 00900 #ifdef UNIV_LOG_ARCHIVE 00901 *(group->archive_file_header_bufs + i) = ut_align( 00902 mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), 00903 OS_FILE_LOG_BLOCK_SIZE); 00904 memset(*(group->archive_file_header_bufs + i), '\0', 00905 LOG_FILE_HDR_SIZE); 00906 #endif /* UNIV_LOG_ARCHIVE */ 00907 } 00908 00909 #ifdef UNIV_LOG_ARCHIVE 00910 group->archive_space_id = archive_space_id; 00911 00912 group->archived_file_no = 0; 00913 group->archived_offset = 0; 00914 #endif /* UNIV_LOG_ARCHIVE */ 00915 00916 group->checkpoint_buf = ut_align( 00917 mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), 00918 OS_FILE_LOG_BLOCK_SIZE); 00919 00920 memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); 00921 00922 UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group); 00923 00924 ut_a(log_calc_max_ages()); 00925 } 00926 00927 /********************************************************************** 00928 Does the unlockings needed in flush i/o completion. */ 00929 UNIV_INLINE 00930 void 00931 log_flush_do_unlocks( 00932 /*=================*/ 00933 ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK 00934 and LOG_UNLOCK_NONE_FLUSHED_LOCK */ 00935 { 00936 #ifdef UNIV_SYNC_DEBUG 00937 ut_ad(mutex_own(&(log_sys->mutex))); 00938 #endif /* UNIV_SYNC_DEBUG */ 00939 00940 /* NOTE that we must own the log mutex when doing the setting of the 00941 events: this is because transactions will wait for these events to 00942 be set, and at that moment the log flush they were waiting for must 00943 have ended. If the log mutex were not reserved here, the i/o-thread 00944 calling this function might be preempted for a while, and when it 00945 resumed execution, it might be that a new flush had been started, and 00946 this function would erroneously signal the NEW flush as completed. 00947 Thus, the changes in the state of these events are performed 00948 atomically in conjunction with the changes in the state of 00949 log_sys->n_pending_writes etc. */ 00950 00951 if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) { 00952 os_event_set(log_sys->one_flushed_event); 00953 } 00954 00955 if (code & LOG_UNLOCK_FLUSH_LOCK) { 00956 os_event_set(log_sys->no_flush_event); 00957 } 00958 } 00959 00960 /********************************************************************** 00961 Checks if a flush is completed for a log group and does the completion 00962 routine if yes. */ 00963 UNIV_INLINE 00964 ulint 00965 log_group_check_flush_completion( 00966 /*=============================*/ 00967 /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */ 00968 log_group_t* group) /* in: log group */ 00969 { 00970 #ifdef UNIV_SYNC_DEBUG 00971 ut_ad(mutex_own(&(log_sys->mutex))); 00972 #endif /* UNIV_SYNC_DEBUG */ 00973 00974 if (!log_sys->one_flushed && group->n_pending_writes == 0) { 00975 #ifdef UNIV_DEBUG 00976 if (log_debug_writes) { 00977 fprintf(stderr, 00978 "Log flushed first to group %lu\n", (ulong) group->id); 00979 } 00980 #endif /* UNIV_DEBUG */ 00981 log_sys->written_to_some_lsn = log_sys->write_lsn; 00982 log_sys->one_flushed = TRUE; 00983 00984 return(LOG_UNLOCK_NONE_FLUSHED_LOCK); 00985 } 00986 00987 #ifdef UNIV_DEBUG 00988 if (log_debug_writes && (group->n_pending_writes == 0)) { 00989 00990 fprintf(stderr, "Log flushed to group %lu\n", (ulong) group->id); 00991 } 00992 #endif /* UNIV_DEBUG */ 00993 return(0); 00994 } 00995 00996 /********************************************************** 00997 Checks if a flush is completed and does the completion routine if yes. */ 00998 static 00999 ulint 01000 log_sys_check_flush_completion(void) 01001 /*================================*/ 01002 /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */ 01003 { 01004 ulint move_start; 01005 ulint move_end; 01006 01007 #ifdef UNIV_SYNC_DEBUG 01008 ut_ad(mutex_own(&(log_sys->mutex))); 01009 #endif /* UNIV_SYNC_DEBUG */ 01010 01011 if (log_sys->n_pending_writes == 0) { 01012 01013 log_sys->written_to_all_lsn = log_sys->write_lsn; 01014 log_sys->buf_next_to_write = log_sys->write_end_offset; 01015 01016 if (log_sys->write_end_offset > log_sys->max_buf_free / 2) { 01017 /* Move the log buffer content to the start of the 01018 buffer */ 01019 01020 move_start = ut_calc_align_down( 01021 log_sys->write_end_offset, 01022 OS_FILE_LOG_BLOCK_SIZE); 01023 move_end = ut_calc_align(log_sys->buf_free, 01024 OS_FILE_LOG_BLOCK_SIZE); 01025 01026 ut_memmove(log_sys->buf, log_sys->buf + move_start, 01027 move_end - move_start); 01028 log_sys->buf_free -= move_start; 01029 01030 log_sys->buf_next_to_write -= move_start; 01031 } 01032 01033 return(LOG_UNLOCK_FLUSH_LOCK); 01034 } 01035 01036 return(0); 01037 } 01038 01039 /********************************************************** 01040 Completes an i/o to a log file. */ 01041 01042 void 01043 log_io_complete( 01044 /*============*/ 01045 log_group_t* group) /* in: log group or a dummy pointer */ 01046 { 01047 ulint unlock; 01048 01049 #ifdef UNIV_LOG_ARCHIVE 01050 if ((byte*)group == &log_archive_io) { 01051 /* It was an archive write */ 01052 01053 log_io_complete_archive(); 01054 01055 return; 01056 } 01057 #endif /* UNIV_LOG_ARCHIVE */ 01058 01059 if ((ulint)group & 0x1UL) { 01060 /* It was a checkpoint write */ 01061 group = (log_group_t*)((ulint)group - 1); 01062 01063 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC 01064 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { 01065 01066 fil_flush(group->space_id); 01067 } 01068 01069 #ifdef UNIV_DEBUG 01070 if (log_debug_writes) { 01071 fprintf(stderr, 01072 "Checkpoint info written to group %lu\n", 01073 group->id); 01074 } 01075 #endif /* UNIV_DEBUG */ 01076 log_io_complete_checkpoint(); 01077 01078 return; 01079 } 01080 01081 ut_error; /* We currently use synchronous writing of the 01082 logs and cannot end up here! */ 01083 01084 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC 01085 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC 01086 && srv_flush_log_at_trx_commit != 2) { 01087 01088 fil_flush(group->space_id); 01089 } 01090 01091 mutex_enter(&(log_sys->mutex)); 01092 01093 ut_a(group->n_pending_writes > 0); 01094 ut_a(log_sys->n_pending_writes > 0); 01095 01096 group->n_pending_writes--; 01097 log_sys->n_pending_writes--; 01098 01099 unlock = log_group_check_flush_completion(group); 01100 unlock = unlock | log_sys_check_flush_completion(); 01101 01102 log_flush_do_unlocks(unlock); 01103 01104 mutex_exit(&(log_sys->mutex)); 01105 } 01106 01107 /********************************************************** 01108 Writes a log file header to a log file space. */ 01109 static 01110 void 01111 log_group_file_header_flush( 01112 /*========================*/ 01113 log_group_t* group, /* in: log group */ 01114 ulint nth_file, /* in: header to the nth file in the 01115 log file space */ 01116 dulint start_lsn) /* in: log file data starts at this 01117 lsn */ 01118 { 01119 byte* buf; 01120 ulint dest_offset; 01121 #ifdef UNIV_SYNC_DEBUG 01122 ut_ad(mutex_own(&(log_sys->mutex))); 01123 #endif /* UNIV_SYNC_DEBUG */ 01124 01125 ut_a(nth_file < group->n_files); 01126 01127 buf = *(group->file_header_bufs + nth_file); 01128 01129 mach_write_to_4(buf + LOG_GROUP_ID, group->id); 01130 mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); 01131 01132 /* Wipe over possible label of ibbackup --restore */ 01133 memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); 01134 01135 dest_offset = nth_file * group->file_size; 01136 01137 #ifdef UNIV_DEBUG 01138 if (log_debug_writes) { 01139 fprintf(stderr, 01140 "Writing log file header to group %lu file %lu\n", 01141 (ulong) group->id, (ulong) nth_file); 01142 } 01143 #endif /* UNIV_DEBUG */ 01144 if (log_do_write) { 01145 log_sys->n_log_ios++; 01146 01147 srv_os_log_pending_writes++; 01148 01149 fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 01150 dest_offset / UNIV_PAGE_SIZE, 01151 dest_offset % UNIV_PAGE_SIZE, 01152 OS_FILE_LOG_BLOCK_SIZE, 01153 buf, group); 01154 01155 srv_os_log_pending_writes--; 01156 } 01157 } 01158 01159 /********************************************************** 01160 Stores a 4-byte checksum to the trailer checksum field of a log block 01161 before writing it to a log file. This checksum is used in recovery to 01162 check the consistency of a log block. */ 01163 static 01164 void 01165 log_block_store_checksum( 01166 /*=====================*/ 01167 byte* block) /* in/out: pointer to a log block */ 01168 { 01169 log_block_set_checksum(block, log_block_calc_checksum(block)); 01170 } 01171 01172 /********************************************************** 01173 Writes a buffer to a log file group. */ 01174 01175 void 01176 log_group_write_buf( 01177 /*================*/ 01178 log_group_t* group, /* in: log group */ 01179 byte* buf, /* in: buffer */ 01180 ulint len, /* in: buffer len; must be divisible 01181 by OS_FILE_LOG_BLOCK_SIZE */ 01182 dulint start_lsn, /* in: start lsn of the buffer; must 01183 be divisible by 01184 OS_FILE_LOG_BLOCK_SIZE */ 01185 ulint new_data_offset)/* in: start offset of new data in 01186 buf: this parameter is used to decide 01187 if we have to write a new log file 01188 header */ 01189 { 01190 ulint write_len; 01191 ibool write_header; 01192 ulint next_offset; 01193 ulint i; 01194 01195 #ifdef UNIV_SYNC_DEBUG 01196 ut_ad(mutex_own(&(log_sys->mutex))); 01197 #endif /* UNIV_SYNC_DEBUG */ 01198 ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0); 01199 ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); 01200 01201 if (new_data_offset == 0) { 01202 write_header = TRUE; 01203 } else { 01204 write_header = FALSE; 01205 } 01206 loop: 01207 if (len == 0) { 01208 01209 return; 01210 } 01211 01212 next_offset = log_group_calc_lsn_offset(start_lsn, group); 01213 01214 if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE) 01215 && write_header) { 01216 /* We start to write a new log file instance in the group */ 01217 01218 log_group_file_header_flush(group, 01219 next_offset / group->file_size, start_lsn); 01220 srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE; 01221 srv_log_writes++; 01222 } 01223 01224 if ((next_offset % group->file_size) + len > group->file_size) { 01225 01226 write_len = group->file_size 01227 - (next_offset % group->file_size); 01228 } else { 01229 write_len = len; 01230 } 01231 01232 #ifdef UNIV_DEBUG 01233 if (log_debug_writes) { 01234 01235 fprintf(stderr, 01236 "Writing log file segment to group %lu offset %lu len %lu\n" 01237 "start lsn %lu %lu\n" 01238 "First block n:o %lu last block n:o %lu\n", 01239 (ulong) group->id, (ulong) next_offset, 01240 (ulong) write_len, 01241 (ulong) ut_dulint_get_high(start_lsn), 01242 (ulong) ut_dulint_get_low(start_lsn), 01243 (ulong) log_block_get_hdr_no(buf), 01244 (ulong) log_block_get_hdr_no( 01245 buf + write_len - OS_FILE_LOG_BLOCK_SIZE)); 01246 ut_a(log_block_get_hdr_no(buf) 01247 == log_block_convert_lsn_to_no(start_lsn)); 01248 01249 for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { 01250 01251 ut_a(log_block_get_hdr_no(buf) + i 01252 == log_block_get_hdr_no(buf 01253 + i * OS_FILE_LOG_BLOCK_SIZE)); 01254 } 01255 } 01256 #endif /* UNIV_DEBUG */ 01257 /* Calculate the checksums for each log block and write them to 01258 the trailer fields of the log blocks */ 01259 01260 for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { 01261 log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE); 01262 } 01263 01264 if (log_do_write) { 01265 log_sys->n_log_ios++; 01266 01267 srv_os_log_pending_writes++; 01268 01269 fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 01270 next_offset / UNIV_PAGE_SIZE, 01271 next_offset % UNIV_PAGE_SIZE, write_len, buf, group); 01272 01273 srv_os_log_pending_writes--; 01274 01275 srv_os_log_written+= write_len; 01276 srv_log_writes++; 01277 } 01278 01279 if (write_len < len) { 01280 start_lsn = ut_dulint_add(start_lsn, write_len); 01281 len -= write_len; 01282 buf += write_len; 01283 01284 write_header = TRUE; 01285 01286 goto loop; 01287 } 01288 } 01289 01290 /********************************************************** 01291 This function is called, e.g., when a transaction wants to commit. It checks 01292 that the log has been written to the log file up to the last log entry written 01293 by the transaction. If there is a flush running, it waits and checks if the 01294 flush flushed enough. If not, starts a new flush. */ 01295 01296 void 01297 log_write_up_to( 01298 /*============*/ 01299 dulint lsn, /* in: log sequence number up to which the log should 01300 be written, ut_dulint_max if not specified */ 01301 ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, 01302 or LOG_WAIT_ALL_GROUPS */ 01303 ibool flush_to_disk) 01304 /* in: TRUE if we want the written log also to be 01305 flushed to disk */ 01306 { 01307 log_group_t* group; 01308 ulint start_offset; 01309 ulint end_offset; 01310 ulint area_start; 01311 ulint area_end; 01312 ulint loop_count; 01313 ulint unlock; 01314 01315 if (recv_no_ibuf_operations) { 01316 /* Recovery is running and no operations on the log files are 01317 allowed yet (the variable name .._no_ibuf_.. is misleading) */ 01318 01319 return; 01320 } 01321 01322 loop_count = 0; 01323 loop: 01324 loop_count++; 01325 01326 ut_ad(loop_count < 5); 01327 01328 if (loop_count > 2) { 01329 /* fprintf(stderr, "Log loop count %lu\n", loop_count); */ 01330 } 01331 01332 mutex_enter(&(log_sys->mutex)); 01333 01334 if (flush_to_disk 01335 && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) { 01336 01337 mutex_exit(&(log_sys->mutex)); 01338 01339 return; 01340 } 01341 01342 if (!flush_to_disk 01343 && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0 01344 || (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) 01345 >= 0 01346 && wait != LOG_WAIT_ALL_GROUPS))) { 01347 01348 mutex_exit(&(log_sys->mutex)); 01349 01350 return; 01351 } 01352 01353 if (log_sys->n_pending_writes > 0) { 01354 /* A write (+ possibly flush to disk) is running */ 01355 01356 if (flush_to_disk 01357 && ut_dulint_cmp(log_sys->current_flush_lsn, lsn) 01358 >= 0) { 01359 /* The write + flush will write enough: wait for it to 01360 complete */ 01361 01362 goto do_waits; 01363 } 01364 01365 if (!flush_to_disk 01366 && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) { 01367 /* The write will write enough: wait for it to 01368 complete */ 01369 01370 goto do_waits; 01371 } 01372 01373 mutex_exit(&(log_sys->mutex)); 01374 01375 /* Wait for the write to complete and try to start a new 01376 write */ 01377 01378 os_event_wait(log_sys->no_flush_event); 01379 01380 goto loop; 01381 } 01382 01383 if (!flush_to_disk 01384 && log_sys->buf_free == log_sys->buf_next_to_write) { 01385 /* Nothing to write and no flush to disk requested */ 01386 01387 mutex_exit(&(log_sys->mutex)); 01388 01389 return; 01390 } 01391 01392 #ifdef UNIV_DEBUG 01393 if (log_debug_writes) { 01394 fprintf(stderr, 01395 "Writing log from %lu %lu up to lsn %lu %lu\n", 01396 (ulong) ut_dulint_get_high(log_sys->written_to_all_lsn), 01397 (ulong) ut_dulint_get_low(log_sys->written_to_all_lsn), 01398 (ulong) ut_dulint_get_high(log_sys->lsn), 01399 (ulong) ut_dulint_get_low(log_sys->lsn)); 01400 } 01401 #endif /* UNIV_DEBUG */ 01402 log_sys->n_pending_writes++; 01403 01404 group = UT_LIST_GET_FIRST(log_sys->log_groups); 01405 group->n_pending_writes++; /* We assume here that we have only 01406 one log group! */ 01407 01408 os_event_reset(log_sys->no_flush_event); 01409 os_event_reset(log_sys->one_flushed_event); 01410 01411 start_offset = log_sys->buf_next_to_write; 01412 end_offset = log_sys->buf_free; 01413 01414 area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE); 01415 area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE); 01416 01417 ut_ad(area_end - area_start > 0); 01418 01419 log_sys->write_lsn = log_sys->lsn; 01420 01421 if (flush_to_disk) { 01422 log_sys->current_flush_lsn = log_sys->lsn; 01423 } 01424 01425 log_sys->one_flushed = FALSE; 01426 01427 log_block_set_flush_bit(log_sys->buf + area_start, TRUE); 01428 log_block_set_checkpoint_no( 01429 log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, 01430 log_sys->next_checkpoint_no); 01431 01432 /* Copy the last, incompletely written, log block a log block length 01433 up, so that when the flush operation writes from the log buffer, the 01434 segment to write will not be changed by writers to the log */ 01435 01436 ut_memcpy(log_sys->buf + area_end, 01437 log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, 01438 OS_FILE_LOG_BLOCK_SIZE); 01439 01440 log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE; 01441 log_sys->write_end_offset = log_sys->buf_free; 01442 01443 group = UT_LIST_GET_FIRST(log_sys->log_groups); 01444 01445 /* Do the write to the log files */ 01446 01447 while (group) { 01448 log_group_write_buf(group, 01449 log_sys->buf + area_start, 01450 area_end - area_start, 01451 ut_dulint_align_down(log_sys->written_to_all_lsn, 01452 OS_FILE_LOG_BLOCK_SIZE), 01453 start_offset - area_start); 01454 01455 log_group_set_fields(group, log_sys->write_lsn); 01456 01457 group = UT_LIST_GET_NEXT(log_groups, group); 01458 } 01459 01460 mutex_exit(&(log_sys->mutex)); 01461 01462 if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { 01463 /* O_DSYNC means the OS did not buffer the log file at all: 01464 so we have also flushed to disk what we have written */ 01465 01466 log_sys->flushed_to_disk_lsn = log_sys->write_lsn; 01467 01468 } else if (flush_to_disk) { 01469 01470 group = UT_LIST_GET_FIRST(log_sys->log_groups); 01471 01472 fil_flush(group->space_id); 01473 log_sys->flushed_to_disk_lsn = log_sys->write_lsn; 01474 } 01475 01476 mutex_enter(&(log_sys->mutex)); 01477 01478 group = UT_LIST_GET_FIRST(log_sys->log_groups); 01479 01480 ut_a(group->n_pending_writes == 1); 01481 ut_a(log_sys->n_pending_writes == 1); 01482 01483 group->n_pending_writes--; 01484 log_sys->n_pending_writes--; 01485 01486 unlock = log_group_check_flush_completion(group); 01487 unlock = unlock | log_sys_check_flush_completion(); 01488 01489 log_flush_do_unlocks(unlock); 01490 01491 mutex_exit(&(log_sys->mutex)); 01492 01493 return; 01494 01495 do_waits: 01496 mutex_exit(&(log_sys->mutex)); 01497 01498 if (wait == LOG_WAIT_ONE_GROUP) { 01499 os_event_wait(log_sys->one_flushed_event); 01500 } else if (wait == LOG_WAIT_ALL_GROUPS) { 01501 os_event_wait(log_sys->no_flush_event); 01502 } else { 01503 ut_ad(wait == LOG_NO_WAIT); 01504 } 01505 } 01506 01507 /******************************************************************** 01508 Does a syncronous flush of the log buffer to disk. */ 01509 01510 void 01511 log_buffer_flush_to_disk(void) 01512 /*==========================*/ 01513 { 01514 dulint lsn; 01515 01516 mutex_enter(&(log_sys->mutex)); 01517 01518 lsn = log_sys->lsn; 01519 01520 mutex_exit(&(log_sys->mutex)); 01521 01522 log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); 01523 } 01524 01525 /******************************************************************** 01526 Tries to establish a big enough margin of free space in the log buffer, such 01527 that a new log entry can be catenated without an immediate need for a flush. */ 01528 static 01529 void 01530 log_flush_margin(void) 01531 /*==================*/ 01532 { 01533 ibool do_flush = FALSE; 01534 log_t* log = log_sys; 01535 dulint lsn; 01536 01537 mutex_enter(&(log->mutex)); 01538 01539 if (log->buf_free > log->max_buf_free) { 01540 01541 if (log->n_pending_writes > 0) { 01542 /* A flush is running: hope that it will provide enough 01543 free space */ 01544 } else { 01545 do_flush = TRUE; 01546 lsn = log->lsn; 01547 } 01548 } 01549 01550 mutex_exit(&(log->mutex)); 01551 01552 if (do_flush) { 01553 log_write_up_to(lsn, LOG_NO_WAIT, FALSE); 01554 } 01555 } 01556 01557 /******************************************************************** 01558 Advances the smallest lsn for which there are unflushed dirty blocks in the 01559 buffer pool. NOTE: this function may only be called if the calling thread owns 01560 no synchronization objects! */ 01561 01562 ibool 01563 log_preflush_pool_modified_pages( 01564 /*=============================*/ 01565 /* out: FALSE if there was a flush batch of 01566 the same type running, which means that we 01567 could not start this flush batch */ 01568 dulint new_oldest, /* in: try to advance oldest_modified_lsn 01569 at least to this lsn */ 01570 ibool sync) /* in: TRUE if synchronous operation is 01571 desired */ 01572 { 01573 ulint n_pages; 01574 01575 if (recv_recovery_on) { 01576 /* If the recovery is running, we must first apply all 01577 log records to their respective file pages to get the 01578 right modify lsn values to these pages: otherwise, there 01579 might be pages on disk which are not yet recovered to the 01580 current lsn, and even after calling this function, we could 01581 not know how up-to-date the disk version of the database is, 01582 and we could not make a new checkpoint on the basis of the 01583 info on the buffer pool only. */ 01584 01585 recv_apply_hashed_log_recs(TRUE); 01586 } 01587 01588 n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest); 01589 01590 if (sync) { 01591 buf_flush_wait_batch_end(BUF_FLUSH_LIST); 01592 } 01593 01594 if (n_pages == ULINT_UNDEFINED) { 01595 01596 return(FALSE); 01597 } 01598 01599 return(TRUE); 01600 } 01601 01602 /********************************************************** 01603 Completes a checkpoint. */ 01604 static 01605 void 01606 log_complete_checkpoint(void) 01607 /*=========================*/ 01608 { 01609 #ifdef UNIV_SYNC_DEBUG 01610 ut_ad(mutex_own(&(log_sys->mutex))); 01611 #endif /* UNIV_SYNC_DEBUG */ 01612 ut_ad(log_sys->n_pending_checkpoint_writes == 0); 01613 01614 log_sys->next_checkpoint_no 01615 = ut_dulint_add(log_sys->next_checkpoint_no, 1); 01616 01617 log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; 01618 01619 rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); 01620 } 01621 01622 /********************************************************** 01623 Completes an asynchronous checkpoint info write i/o to a log file. */ 01624 static 01625 void 01626 log_io_complete_checkpoint(void) 01627 /*============================*/ 01628 { 01629 mutex_enter(&(log_sys->mutex)); 01630 01631 ut_ad(log_sys->n_pending_checkpoint_writes > 0); 01632 01633 log_sys->n_pending_checkpoint_writes--; 01634 01635 if (log_sys->n_pending_checkpoint_writes == 0) { 01636 log_complete_checkpoint(); 01637 } 01638 01639 mutex_exit(&(log_sys->mutex)); 01640 } 01641 01642 /*********************************************************************** 01643 Writes info to a checkpoint about a log group. */ 01644 static 01645 void 01646 log_checkpoint_set_nth_group_info( 01647 /*==============================*/ 01648 byte* buf, /* in: buffer for checkpoint info */ 01649 ulint n, /* in: nth slot */ 01650 ulint file_no,/* in: archived file number */ 01651 ulint offset) /* in: archived file offset */ 01652 { 01653 ut_ad(n < LOG_MAX_N_GROUPS); 01654 01655 mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY 01656 + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no); 01657 mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY 01658 + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset); 01659 } 01660 01661 /*********************************************************************** 01662 Gets info from a checkpoint about a log group. */ 01663 01664 void 01665 log_checkpoint_get_nth_group_info( 01666 /*==============================*/ 01667 byte* buf, /* in: buffer containing checkpoint info */ 01668 ulint n, /* in: nth slot */ 01669 ulint* file_no,/* out: archived file number */ 01670 ulint* offset) /* out: archived file offset */ 01671 { 01672 ut_ad(n < LOG_MAX_N_GROUPS); 01673 01674 *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY 01675 + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO); 01676 *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY 01677 + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET); 01678 } 01679 01680 /********************************************************** 01681 Writes the checkpoint info to a log group header. */ 01682 static 01683 void 01684 log_group_checkpoint( 01685 /*=================*/ 01686 log_group_t* group) /* in: log group */ 01687 { 01688 log_group_t* group2; 01689 #ifdef UNIV_LOG_ARCHIVE 01690 dulint archived_lsn; 01691 dulint next_archived_lsn; 01692 #endif /* UNIV_LOG_ARCHIVE */ 01693 ulint write_offset; 01694 ulint fold; 01695 byte* buf; 01696 ulint i; 01697 01698 #ifdef UNIV_SYNC_DEBUG 01699 ut_ad(mutex_own(&(log_sys->mutex))); 01700 #endif /* UNIV_SYNC_DEBUG */ 01701 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE 01702 # error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE" 01703 #endif 01704 01705 buf = group->checkpoint_buf; 01706 01707 mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); 01708 mach_write_to_8(buf + LOG_CHECKPOINT_LSN, 01709 log_sys->next_checkpoint_lsn); 01710 01711 mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET, 01712 log_group_calc_lsn_offset( 01713 log_sys->next_checkpoint_lsn, group)); 01714 01715 mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size); 01716 01717 #ifdef UNIV_LOG_ARCHIVE 01718 if (log_sys->archiving_state == LOG_ARCH_OFF) { 01719 archived_lsn = ut_dulint_max; 01720 } else { 01721 archived_lsn = log_sys->archived_lsn; 01722 01723 if (0 != ut_dulint_cmp(archived_lsn, 01724 log_sys->next_archived_lsn)) { 01725 next_archived_lsn = log_sys->next_archived_lsn; 01726 /* For debugging only */ 01727 } 01728 } 01729 01730 mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn); 01731 #else /* UNIV_LOG_ARCHIVE */ 01732 mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max); 01733 #endif /* UNIV_LOG_ARCHIVE */ 01734 01735 for (i = 0; i < LOG_MAX_N_GROUPS; i++) { 01736 log_checkpoint_set_nth_group_info(buf, i, 0, 0); 01737 } 01738 01739 group2 = UT_LIST_GET_FIRST(log_sys->log_groups); 01740 01741 while (group2) { 01742 log_checkpoint_set_nth_group_info(buf, group2->id, 01743 #ifdef UNIV_LOG_ARCHIVE 01744 group2->archived_file_no, 01745 group2->archived_offset 01746 #else /* UNIV_LOG_ARCHIVE */ 01747 0, 0 01748 #endif /* UNIV_LOG_ARCHIVE */ 01749 ); 01750 01751 group2 = UT_LIST_GET_NEXT(log_groups, group2); 01752 } 01753 01754 fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); 01755 mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); 01756 01757 fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, 01758 LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); 01759 mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); 01760 01761 /* Starting from InnoDB-3.23.50, we also write info on allocated 01762 size in the tablespace */ 01763 01764 mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT, 01765 log_fsp_current_free_limit); 01766 01767 mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N, 01768 LOG_CHECKPOINT_FSP_MAGIC_N_VAL); 01769 01770 /* We alternate the physical place of the checkpoint info in the first 01771 log file */ 01772 01773 if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) { 01774 write_offset = LOG_CHECKPOINT_1; 01775 } else { 01776 write_offset = LOG_CHECKPOINT_2; 01777 } 01778 01779 if (log_do_write) { 01780 if (log_sys->n_pending_checkpoint_writes == 0) { 01781 01782 rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), 01783 LOG_CHECKPOINT); 01784 } 01785 01786 log_sys->n_pending_checkpoint_writes++; 01787 01788 log_sys->n_log_ios++; 01789 01790 /* We send as the last parameter the group machine address 01791 added with 1, as we want to distinguish between a normal log 01792 file write and a checkpoint field write */ 01793 01794 fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 01795 write_offset / UNIV_PAGE_SIZE, 01796 write_offset % UNIV_PAGE_SIZE, 01797 OS_FILE_LOG_BLOCK_SIZE, 01798 buf, ((byte*)group + 1)); 01799 01800 ut_ad(((ulint)group & 0x1UL) == 0); 01801 } 01802 } 01803 01804 /********************************************************** 01805 Writes info to a buffer of a log group when log files are created in 01806 backup restoration. */ 01807 01808 void 01809 log_reset_first_header_and_checkpoint( 01810 /*==================================*/ 01811 byte* hdr_buf,/* in: buffer which will be written to the start 01812 of the first log file */ 01813 dulint start) /* in: lsn of the start of the first log file; 01814 we pretend that there is a checkpoint at 01815 start + LOG_BLOCK_HDR_SIZE */ 01816 { 01817 ulint fold; 01818 byte* buf; 01819 dulint lsn; 01820 01821 mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0); 01822 mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start); 01823 01824 lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE); 01825 01826 /* Write the label of ibbackup --restore */ 01827 strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, 01828 "ibbackup "); 01829 ut_sprintf_timestamp( 01830 (char*) hdr_buf + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 01831 + (sizeof "ibbackup ") - 1)); 01832 buf = hdr_buf + LOG_CHECKPOINT_1; 01833 01834 mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero); 01835 mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn); 01836 01837 mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET, 01838 LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE); 01839 01840 mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024); 01841 01842 mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max); 01843 01844 fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); 01845 mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); 01846 01847 fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, 01848 LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); 01849 mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); 01850 01851 /* Starting from InnoDB-3.23.50, we should also write info on 01852 allocated size in the tablespace, but unfortunately we do not 01853 know it here */ 01854 } 01855 01856 /********************************************************** 01857 Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ 01858 01859 void 01860 log_group_read_checkpoint_info( 01861 /*===========================*/ 01862 log_group_t* group, /* in: log group */ 01863 ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ 01864 { 01865 #ifdef UNIV_SYNC_DEBUG 01866 ut_ad(mutex_own(&(log_sys->mutex))); 01867 #endif /* UNIV_SYNC_DEBUG */ 01868 01869 log_sys->n_log_ios++; 01870 01871 fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 01872 field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, 01873 OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); 01874 } 01875 01876 /********************************************************** 01877 Writes checkpoint info to groups. */ 01878 01879 void 01880 log_groups_write_checkpoint_info(void) 01881 /*==================================*/ 01882 { 01883 log_group_t* group; 01884 01885 #ifdef UNIV_SYNC_DEBUG 01886 ut_ad(mutex_own(&(log_sys->mutex))); 01887 #endif /* UNIV_SYNC_DEBUG */ 01888 01889 group = UT_LIST_GET_FIRST(log_sys->log_groups); 01890 01891 while (group) { 01892 log_group_checkpoint(group); 01893 01894 group = UT_LIST_GET_NEXT(log_groups, group); 01895 } 01896 } 01897 01898 /********************************************************** 01899 Makes a checkpoint. Note that this function does not flush dirty 01900 blocks from the buffer pool: it only checks what is lsn of the oldest 01901 modification in the pool, and writes information about the lsn in 01902 log files. Use log_make_checkpoint_at to flush also the pool. */ 01903 01904 ibool 01905 log_checkpoint( 01906 /*===========*/ 01907 /* out: TRUE if success, FALSE if a checkpoint 01908 write was already running */ 01909 ibool sync, /* in: TRUE if synchronous operation is 01910 desired */ 01911 ibool write_always) /* in: the function normally checks if the 01912 the new checkpoint would have a greater 01913 lsn than the previous one: if not, then no 01914 physical write is done; by setting this 01915 parameter TRUE, a physical write will always be 01916 made to log files */ 01917 { 01918 dulint oldest_lsn; 01919 01920 if (recv_recovery_is_on()) { 01921 recv_apply_hashed_log_recs(TRUE); 01922 } 01923 01924 if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { 01925 fil_flush_file_spaces(FIL_TABLESPACE); 01926 } 01927 01928 mutex_enter(&(log_sys->mutex)); 01929 01930 oldest_lsn = log_buf_pool_get_oldest_modification(); 01931 01932 mutex_exit(&(log_sys->mutex)); 01933 01934 /* Because log also contains headers and dummy log records, 01935 if the buffer pool contains no dirty buffers, oldest_lsn 01936 gets the value log_sys->lsn from the previous function, 01937 and we must make sure that the log is flushed up to that 01938 lsn. If there are dirty buffers in the buffer pool, then our 01939 write-ahead-logging algorithm ensures that the log has been flushed 01940 up to oldest_lsn. */ 01941 01942 log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); 01943 01944 mutex_enter(&(log_sys->mutex)); 01945 01946 if (!write_always && ut_dulint_cmp( 01947 log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) { 01948 01949 mutex_exit(&(log_sys->mutex)); 01950 01951 return(TRUE); 01952 } 01953 01954 ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0); 01955 01956 if (log_sys->n_pending_checkpoint_writes > 0) { 01957 /* A checkpoint write is running */ 01958 01959 mutex_exit(&(log_sys->mutex)); 01960 01961 if (sync) { 01962 /* Wait for the checkpoint write to complete */ 01963 rw_lock_s_lock(&(log_sys->checkpoint_lock)); 01964 rw_lock_s_unlock(&(log_sys->checkpoint_lock)); 01965 } 01966 01967 return(FALSE); 01968 } 01969 01970 log_sys->next_checkpoint_lsn = oldest_lsn; 01971 01972 #ifdef UNIV_DEBUG 01973 if (log_debug_writes) { 01974 fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n", 01975 (ulong) ut_dulint_get_low(log_sys->next_checkpoint_no), 01976 (ulong) ut_dulint_get_high(oldest_lsn), 01977 (ulong) ut_dulint_get_low(oldest_lsn)); 01978 } 01979 #endif /* UNIV_DEBUG */ 01980 01981 log_groups_write_checkpoint_info(); 01982 01983 mutex_exit(&(log_sys->mutex)); 01984 01985 if (sync) { 01986 /* Wait for the checkpoint write to complete */ 01987 rw_lock_s_lock(&(log_sys->checkpoint_lock)); 01988 rw_lock_s_unlock(&(log_sys->checkpoint_lock)); 01989 } 01990 01991 return(TRUE); 01992 } 01993 01994 /******************************************************************** 01995 Makes a checkpoint at a given lsn or later. */ 01996 01997 void 01998 log_make_checkpoint_at( 01999 /*===================*/ 02000 dulint lsn, /* in: make a checkpoint at this or a later 02001 lsn, if ut_dulint_max, makes a checkpoint at 02002 the latest lsn */ 02003 ibool write_always) /* in: the function normally checks if the 02004 the new checkpoint would have a greater 02005 lsn than the previous one: if not, then no 02006 physical write is done; by setting this 02007 parameter TRUE, a physical write will always be 02008 made to log files */ 02009 { 02010 ibool success; 02011 02012 /* Preflush pages synchronously */ 02013 02014 success = FALSE; 02015 02016 while (!success) { 02017 success = log_preflush_pool_modified_pages(lsn, TRUE); 02018 } 02019 02020 success = FALSE; 02021 02022 while (!success) { 02023 success = log_checkpoint(TRUE, write_always); 02024 } 02025 } 02026 02027 /******************************************************************** 02028 Tries to establish a big enough margin of free space in the log groups, such 02029 that a new log entry can be catenated without an immediate need for a 02030 checkpoint. NOTE: this function may only be called if the calling thread 02031 owns no synchronization objects! */ 02032 static 02033 void 02034 log_checkpoint_margin(void) 02035 /*=======================*/ 02036 { 02037 log_t* log = log_sys; 02038 ulint age; 02039 ulint checkpoint_age; 02040 ulint advance; 02041 dulint oldest_lsn; 02042 ibool sync; 02043 ibool checkpoint_sync; 02044 ibool do_checkpoint; 02045 ibool success; 02046 loop: 02047 sync = FALSE; 02048 checkpoint_sync = FALSE; 02049 do_checkpoint = FALSE; 02050 02051 mutex_enter(&(log->mutex)); 02052 02053 if (log->check_flush_or_checkpoint == FALSE) { 02054 mutex_exit(&(log->mutex)); 02055 02056 return; 02057 } 02058 02059 oldest_lsn = log_buf_pool_get_oldest_modification(); 02060 02061 age = ut_dulint_minus(log->lsn, oldest_lsn); 02062 02063 if (age > log->max_modified_age_sync) { 02064 02065 /* A flush is urgent: we have to do a synchronous preflush */ 02066 02067 sync = TRUE; 02068 advance = 2 * (age - log->max_modified_age_sync); 02069 } else if (age > log->max_modified_age_async) { 02070 02071 /* A flush is not urgent: we do an asynchronous preflush */ 02072 advance = age - log->max_modified_age_async; 02073 } else { 02074 advance = 0; 02075 } 02076 02077 checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn); 02078 02079 if (checkpoint_age > log->max_checkpoint_age) { 02080 /* A checkpoint is urgent: we do it synchronously */ 02081 02082 checkpoint_sync = TRUE; 02083 02084 do_checkpoint = TRUE; 02085 02086 } else if (checkpoint_age > log->max_checkpoint_age_async) { 02087 /* A checkpoint is not urgent: do it asynchronously */ 02088 02089 do_checkpoint = TRUE; 02090 02091 log->check_flush_or_checkpoint = FALSE; 02092 } else { 02093 log->check_flush_or_checkpoint = FALSE; 02094 } 02095 02096 mutex_exit(&(log->mutex)); 02097 02098 if (advance) { 02099 dulint new_oldest = ut_dulint_add(oldest_lsn, advance); 02100 02101 success = log_preflush_pool_modified_pages(new_oldest, sync); 02102 02103 /* If the flush succeeded, this thread has done its part 02104 and can proceed. If it did not succeed, there was another 02105 thread doing a flush at the same time. If sync was FALSE, 02106 the flush was not urgent, and we let this thread proceed. 02107 Otherwise, we let it start from the beginning again. */ 02108 02109 if (sync && !success) { 02110 mutex_enter(&(log->mutex)); 02111 02112 log->check_flush_or_checkpoint = TRUE; 02113 02114 mutex_exit(&(log->mutex)); 02115 goto loop; 02116 } 02117 } 02118 02119 if (do_checkpoint) { 02120 log_checkpoint(checkpoint_sync, FALSE); 02121 02122 if (checkpoint_sync) { 02123 02124 goto loop; 02125 } 02126 } 02127 } 02128 02129 /********************************************************** 02130 Reads a specified log segment to a buffer. */ 02131 02132 void 02133 log_group_read_log_seg( 02134 /*===================*/ 02135 ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */ 02136 byte* buf, /* in: buffer where to read */ 02137 log_group_t* group, /* in: log group */ 02138 dulint start_lsn, /* in: read area start */ 02139 dulint end_lsn) /* in: read area end */ 02140 { 02141 ulint len; 02142 ulint source_offset; 02143 ibool sync; 02144 02145 #ifdef UNIV_SYNC_DEBUG 02146 ut_ad(mutex_own(&(log_sys->mutex))); 02147 #endif /* UNIV_SYNC_DEBUG */ 02148 02149 sync = FALSE; 02150 02151 if (type == LOG_RECOVER) { 02152 sync = TRUE; 02153 } 02154 loop: 02155 source_offset = log_group_calc_lsn_offset(start_lsn, group); 02156 02157 len = ut_dulint_minus(end_lsn, start_lsn); 02158 02159 ut_ad(len != 0); 02160 02161 if ((source_offset % group->file_size) + len > group->file_size) { 02162 02163 len = group->file_size - (source_offset % group->file_size); 02164 } 02165 02166 #ifdef UNIV_LOG_ARCHIVE 02167 if (type == LOG_ARCHIVE) { 02168 02169 log_sys->n_pending_archive_ios++; 02170 } 02171 #endif /* UNIV_LOG_ARCHIVE */ 02172 02173 log_sys->n_log_ios++; 02174 02175 fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 02176 source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE, 02177 len, buf, NULL); 02178 02179 start_lsn = ut_dulint_add(start_lsn, len); 02180 buf += len; 02181 02182 if (ut_dulint_cmp(start_lsn, end_lsn) != 0) { 02183 02184 goto loop; 02185 } 02186 } 02187 02188 #ifdef UNIV_LOG_ARCHIVE 02189 /********************************************************** 02190 Generates an archived log file name. */ 02191 02192 void 02193 log_archived_file_name_gen( 02194 /*=======================*/ 02195 char* buf, /* in: buffer where to write */ 02196 ulint id __attribute__((unused)), 02197 /* in: group id; 02198 currently we only archive the first group */ 02199 ulint file_no)/* in: file number */ 02200 { 02201 sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no); 02202 } 02203 02204 /********************************************************** 02205 Writes a log file header to a log file space. */ 02206 static 02207 void 02208 log_group_archive_file_header_write( 02209 /*================================*/ 02210 log_group_t* group, /* in: log group */ 02211 ulint nth_file, /* in: header to the nth file in the 02212 archive log file space */ 02213 ulint file_no, /* in: archived file number */ 02214 dulint start_lsn) /* in: log file data starts at this 02215 lsn */ 02216 { 02217 byte* buf; 02218 ulint dest_offset; 02219 02220 #ifdef UNIV_SYNC_DEBUG 02221 ut_ad(mutex_own(&(log_sys->mutex))); 02222 #endif /* UNIV_SYNC_DEBUG */ 02223 02224 ut_a(nth_file < group->n_files); 02225 02226 buf = *(group->archive_file_header_bufs + nth_file); 02227 02228 mach_write_to_4(buf + LOG_GROUP_ID, group->id); 02229 mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); 02230 mach_write_to_4(buf + LOG_FILE_NO, file_no); 02231 02232 mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE); 02233 02234 dest_offset = nth_file * group->file_size; 02235 02236 log_sys->n_log_ios++; 02237 02238 fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, 02239 dest_offset / UNIV_PAGE_SIZE, 02240 dest_offset % UNIV_PAGE_SIZE, 02241 2 * OS_FILE_LOG_BLOCK_SIZE, 02242 buf, &log_archive_io); 02243 } 02244 02245 /********************************************************** 02246 Writes a log file header to a completed archived log file. */ 02247 static 02248 void 02249 log_group_archive_completed_header_write( 02250 /*=====================================*/ 02251 log_group_t* group, /* in: log group */ 02252 ulint nth_file, /* in: header to the nth file in the 02253 archive log file space */ 02254 dulint end_lsn) /* in: end lsn of the file */ 02255 { 02256 byte* buf; 02257 ulint dest_offset; 02258 02259 #ifdef UNIV_SYNC_DEBUG 02260 ut_ad(mutex_own(&(log_sys->mutex))); 02261 #endif /* UNIV_SYNC_DEBUG */ 02262 ut_a(nth_file < group->n_files); 02263 02264 buf = *(group->archive_file_header_bufs + nth_file); 02265 02266 mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE); 02267 mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn); 02268 02269 dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED; 02270 02271 log_sys->n_log_ios++; 02272 02273 fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, 02274 dest_offset / UNIV_PAGE_SIZE, 02275 dest_offset % UNIV_PAGE_SIZE, 02276 OS_FILE_LOG_BLOCK_SIZE, 02277 buf + LOG_FILE_ARCH_COMPLETED, 02278 &log_archive_io); 02279 } 02280 02281 /********************************************************** 02282 Does the archive writes for a single log group. */ 02283 static 02284 void 02285 log_group_archive( 02286 /*==============*/ 02287 log_group_t* group) /* in: log group */ 02288 { 02289 os_file_t file_handle; 02290 dulint start_lsn; 02291 dulint end_lsn; 02292 char name[1024]; 02293 byte* buf; 02294 ulint len; 02295 ibool ret; 02296 ulint next_offset; 02297 ulint n_files; 02298 ulint open_mode; 02299 02300 #ifdef UNIV_SYNC_DEBUG 02301 ut_ad(mutex_own(&(log_sys->mutex))); 02302 #endif /* UNIV_SYNC_DEBUG */ 02303 02304 start_lsn = log_sys->archived_lsn; 02305 02306 ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); 02307 02308 end_lsn = log_sys->next_archived_lsn; 02309 02310 ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); 02311 02312 buf = log_sys->archive_buf; 02313 02314 n_files = 0; 02315 02316 next_offset = group->archived_offset; 02317 loop: 02318 if ((next_offset % group->file_size == 0) 02319 || (fil_space_get_size(group->archive_space_id) == 0)) { 02320 02321 /* Add the file to the archive file space; create or open the 02322 file */ 02323 02324 if (next_offset % group->file_size == 0) { 02325 open_mode = OS_FILE_CREATE; 02326 } else { 02327 open_mode = OS_FILE_OPEN; 02328 } 02329 02330 log_archived_file_name_gen(name, group->id, 02331 group->archived_file_no + n_files); 02332 02333 file_handle = os_file_create(name, open_mode, OS_FILE_AIO, 02334 OS_DATA_FILE, &ret); 02335 02336 if (!ret && (open_mode == OS_FILE_CREATE)) { 02337 file_handle = os_file_create(name, OS_FILE_OPEN, 02338 OS_FILE_AIO, OS_DATA_FILE, &ret); 02339 } 02340 02341 if (!ret) { 02342 fprintf(stderr, 02343 "InnoDB: Cannot create or open archive log file %s.\n" 02344 "InnoDB: Cannot continue operation.\n" 02345 "InnoDB: Check that the log archive directory exists,\n" 02346 "InnoDB: you have access rights to it, and\n" 02347 "InnoDB: there is space available.\n", name); 02348 exit(1); 02349 } 02350 02351 #ifdef UNIV_DEBUG 02352 if (log_debug_writes) { 02353 fprintf(stderr, "Created archive file %s\n", name); 02354 } 02355 #endif /* UNIV_DEBUG */ 02356 02357 ret = os_file_close(file_handle); 02358 02359 ut_a(ret); 02360 02361 /* Add the archive file as a node to the space */ 02362 02363 fil_node_create(name, group->file_size / UNIV_PAGE_SIZE, 02364 group->archive_space_id, FALSE); 02365 02366 if (next_offset % group->file_size == 0) { 02367 log_group_archive_file_header_write(group, n_files, 02368 group->archived_file_no + n_files, 02369 start_lsn); 02370 02371 next_offset += LOG_FILE_HDR_SIZE; 02372 } 02373 } 02374 02375 len = ut_dulint_minus(end_lsn, start_lsn); 02376 02377 if (group->file_size < (next_offset % group->file_size) + len) { 02378 02379 len = group->file_size - (next_offset % group->file_size); 02380 } 02381 02382 #ifdef UNIV_DEBUG 02383 if (log_debug_writes) { 02384 fprintf(stderr, 02385 "Archiving starting at lsn %lu %lu, len %lu to group %lu\n", 02386 (ulong) ut_dulint_get_high(start_lsn), 02387 (ulong) ut_dulint_get_low(start_lsn), 02388 (ulong) len, (ulong) group->id); 02389 } 02390 #endif /* UNIV_DEBUG */ 02391 02392 log_sys->n_pending_archive_ios++; 02393 02394 log_sys->n_log_ios++; 02395 02396 fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id, 02397 next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE, 02398 ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, 02399 &log_archive_io); 02400 02401 start_lsn = ut_dulint_add(start_lsn, len); 02402 next_offset += len; 02403 buf += len; 02404 02405 if (next_offset % group->file_size == 0) { 02406 n_files++; 02407 } 02408 02409 if (ut_dulint_cmp(end_lsn, start_lsn) != 0) { 02410 02411 goto loop; 02412 } 02413 02414 group->next_archived_file_no = group->archived_file_no + n_files; 02415 group->next_archived_offset = next_offset % group->file_size; 02416 02417 ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); 02418 } 02419 02420 /********************************************************* 02421 (Writes to the archive of each log group.) Currently, only the first 02422 group is archived. */ 02423 static 02424 void 02425 log_archive_groups(void) 02426 /*====================*/ 02427 { 02428 log_group_t* group; 02429 02430 #ifdef UNIV_SYNC_DEBUG 02431 ut_ad(mutex_own(&(log_sys->mutex))); 02432 #endif /* UNIV_SYNC_DEBUG */ 02433 02434 group = UT_LIST_GET_FIRST(log_sys->log_groups); 02435 02436 log_group_archive(group); 02437 } 02438 02439 /********************************************************* 02440 Completes the archiving write phase for (each log group), currently, 02441 the first log group. */ 02442 static 02443 void 02444 log_archive_write_complete_groups(void) 02445 /*===================================*/ 02446 { 02447 log_group_t* group; 02448 ulint end_offset; 02449 ulint trunc_files; 02450 ulint n_files; 02451 dulint start_lsn; 02452 dulint end_lsn; 02453 ulint i; 02454 02455 #ifdef UNIV_SYNC_DEBUG 02456 ut_ad(mutex_own(&(log_sys->mutex))); 02457 #endif /* UNIV_SYNC_DEBUG */ 02458 02459 group = UT_LIST_GET_FIRST(log_sys->log_groups); 02460 02461 group->archived_file_no = group->next_archived_file_no; 02462 group->archived_offset = group->next_archived_offset; 02463 02464 /* Truncate from the archive file space all but the last 02465 file, or if it has been written full, all files */ 02466 02467 n_files = (UNIV_PAGE_SIZE 02468 * fil_space_get_size(group->archive_space_id)) 02469 / group->file_size; 02470 ut_ad(n_files > 0); 02471 02472 end_offset = group->archived_offset; 02473 02474 if (end_offset % group->file_size == 0) { 02475 02476 trunc_files = n_files; 02477 } else { 02478 trunc_files = n_files - 1; 02479 } 02480 02481 #ifdef UNIV_DEBUG 02482 if (log_debug_writes && trunc_files) { 02483 fprintf(stderr, 02484 "Complete file(s) archived to group %lu\n", 02485 (ulong) group->id); 02486 } 02487 #endif /* UNIV_DEBUG */ 02488 02489 /* Calculate the archive file space start lsn */ 02490 start_lsn = ut_dulint_subtract(log_sys->next_archived_lsn, 02491 end_offset - LOG_FILE_HDR_SIZE 02492 + trunc_files 02493 * (group->file_size - LOG_FILE_HDR_SIZE)); 02494 end_lsn = start_lsn; 02495 02496 for (i = 0; i < trunc_files; i++) { 02497 02498 end_lsn = ut_dulint_add(end_lsn, 02499 group->file_size - LOG_FILE_HDR_SIZE); 02500 02501 /* Write a notice to the headers of archived log 02502 files that the file write has been completed */ 02503 02504 log_group_archive_completed_header_write(group, i, end_lsn); 02505 } 02506 02507 fil_space_truncate_start(group->archive_space_id, 02508 trunc_files * group->file_size); 02509 02510 #ifdef UNIV_DEBUG 02511 if (log_debug_writes) { 02512 fputs("Archiving writes completed\n", stderr); 02513 } 02514 #endif /* UNIV_DEBUG */ 02515 } 02516 02517 /********************************************************** 02518 Completes an archiving i/o. */ 02519 static 02520 void 02521 log_archive_check_completion_low(void) 02522 /*==================================*/ 02523 { 02524 #ifdef UNIV_SYNC_DEBUG 02525 ut_ad(mutex_own(&(log_sys->mutex))); 02526 #endif /* UNIV_SYNC_DEBUG */ 02527 02528 if (log_sys->n_pending_archive_ios == 0 02529 && log_sys->archiving_phase == LOG_ARCHIVE_READ) { 02530 02531 #ifdef UNIV_DEBUG 02532 if (log_debug_writes) { 02533 fputs("Archiving read completed\n", stderr); 02534 } 02535 #endif /* UNIV_DEBUG */ 02536 02537 /* Archive buffer has now been read in: start archive writes */ 02538 02539 log_sys->archiving_phase = LOG_ARCHIVE_WRITE; 02540 02541 log_archive_groups(); 02542 } 02543 02544 if (log_sys->n_pending_archive_ios == 0 02545 && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) { 02546 02547 log_archive_write_complete_groups(); 02548 02549 log_sys->archived_lsn = log_sys->next_archived_lsn; 02550 02551 rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); 02552 } 02553 } 02554 02555 /********************************************************** 02556 Completes an archiving i/o. */ 02557 static 02558 void 02559 log_io_complete_archive(void) 02560 /*=========================*/ 02561 { 02562 log_group_t* group; 02563 02564 mutex_enter(&(log_sys->mutex)); 02565 02566 group = UT_LIST_GET_FIRST(log_sys->log_groups); 02567 02568 mutex_exit(&(log_sys->mutex)); 02569 02570 fil_flush(group->archive_space_id); 02571 02572 mutex_enter(&(log_sys->mutex)); 02573 02574 ut_ad(log_sys->n_pending_archive_ios > 0); 02575 02576 log_sys->n_pending_archive_ios--; 02577 02578 log_archive_check_completion_low(); 02579 02580 mutex_exit(&(log_sys->mutex)); 02581 } 02582 02583 /************************************************************************ 02584 Starts an archiving operation. */ 02585 02586 ibool 02587 log_archive_do( 02588 /*===========*/ 02589 /* out: TRUE if succeed, FALSE if an archiving 02590 operation was already running */ 02591 ibool sync, /* in: TRUE if synchronous operation is desired */ 02592 ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to 02593 archive */ 02594 { 02595 ibool calc_new_limit; 02596 dulint start_lsn; 02597 dulint limit_lsn; 02598 02599 calc_new_limit = TRUE; 02600 loop: 02601 mutex_enter(&(log_sys->mutex)); 02602 02603 if (log_sys->archiving_state == LOG_ARCH_OFF) { 02604 mutex_exit(&(log_sys->mutex)); 02605 02606 *n_bytes = 0; 02607 02608 return(TRUE); 02609 02610 } else if (log_sys->archiving_state == LOG_ARCH_STOPPED 02611 || log_sys->archiving_state == LOG_ARCH_STOPPING2) { 02612 02613 mutex_exit(&(log_sys->mutex)); 02614 02615 os_event_wait(log_sys->archiving_on); 02616 02617 mutex_enter(&(log_sys->mutex)); 02618 02619 goto loop; 02620 } 02621 02622 start_lsn = log_sys->archived_lsn; 02623 02624 if (calc_new_limit) { 02625 ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0); 02626 limit_lsn = ut_dulint_add(start_lsn, 02627 log_sys->archive_buf_size); 02628 02629 *n_bytes = log_sys->archive_buf_size; 02630 02631 if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) { 02632 02633 limit_lsn = ut_dulint_align_down(log_sys->lsn, 02634 OS_FILE_LOG_BLOCK_SIZE); 02635 } 02636 } 02637 02638 if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) { 02639 02640 mutex_exit(&(log_sys->mutex)); 02641 02642 *n_bytes = 0; 02643 02644 return(TRUE); 02645 } 02646 02647 if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) { 02648 02649 mutex_exit(&(log_sys->mutex)); 02650 02651 log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); 02652 02653 calc_new_limit = FALSE; 02654 02655 goto loop; 02656 } 02657 02658 if (log_sys->n_pending_archive_ios > 0) { 02659 /* An archiving operation is running */ 02660 02661 mutex_exit(&(log_sys->mutex)); 02662 02663 if (sync) { 02664 rw_lock_s_lock(&(log_sys->archive_lock)); 02665 rw_lock_s_unlock(&(log_sys->archive_lock)); 02666 } 02667 02668 *n_bytes = log_sys->archive_buf_size; 02669 02670 return(FALSE); 02671 } 02672 02673 rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); 02674 02675 log_sys->archiving_phase = LOG_ARCHIVE_READ; 02676 02677 log_sys->next_archived_lsn = limit_lsn; 02678 02679 #ifdef UNIV_DEBUG 02680 if (log_debug_writes) { 02681 fprintf(stderr, 02682 "Archiving from lsn %lu %lu to lsn %lu %lu\n", 02683 (ulong) ut_dulint_get_high(log_sys->archived_lsn), 02684 (ulong) ut_dulint_get_low(log_sys->archived_lsn), 02685 (ulong) ut_dulint_get_high(limit_lsn), 02686 (ulong) ut_dulint_get_low(limit_lsn)); 02687 } 02688 #endif /* UNIV_DEBUG */ 02689 02690 /* Read the log segment to the archive buffer */ 02691 02692 log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf, 02693 UT_LIST_GET_FIRST(log_sys->log_groups), 02694 start_lsn, limit_lsn); 02695 02696 mutex_exit(&(log_sys->mutex)); 02697 02698 if (sync) { 02699 rw_lock_s_lock(&(log_sys->archive_lock)); 02700 rw_lock_s_unlock(&(log_sys->archive_lock)); 02701 } 02702 02703 *n_bytes = log_sys->archive_buf_size; 02704 02705 return(TRUE); 02706 } 02707 02708 /******************************************************************** 02709 Writes the log contents to the archive at least up to the lsn when this 02710 function was called. */ 02711 static 02712 void 02713 log_archive_all(void) 02714 /*=================*/ 02715 { 02716 dulint present_lsn; 02717 ulint dummy; 02718 02719 mutex_enter(&(log_sys->mutex)); 02720 02721 if (log_sys->archiving_state == LOG_ARCH_OFF) { 02722 mutex_exit(&(log_sys->mutex)); 02723 02724 return; 02725 } 02726 02727 present_lsn = log_sys->lsn; 02728 02729 mutex_exit(&(log_sys->mutex)); 02730 02731 log_pad_current_log_block(); 02732 02733 for (;;) { 02734 mutex_enter(&(log_sys->mutex)); 02735 02736 if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) { 02737 02738 mutex_exit(&(log_sys->mutex)); 02739 02740 return; 02741 } 02742 02743 mutex_exit(&(log_sys->mutex)); 02744 02745 log_archive_do(TRUE, &dummy); 02746 } 02747 } 02748 02749 /********************************************************* 02750 Closes the possible open archive log file (for each group) the first group, 02751 and if it was open, increments the group file count by 2, if desired. */ 02752 static 02753 void 02754 log_archive_close_groups( 02755 /*=====================*/ 02756 ibool increment_file_count) /* in: TRUE if we want to increment 02757 the file count */ 02758 { 02759 log_group_t* group; 02760 ulint trunc_len; 02761 02762 #ifdef UNIV_SYNC_DEBUG 02763 ut_ad(mutex_own(&(log_sys->mutex))); 02764 #endif /* UNIV_SYNC_DEBUG */ 02765 02766 if (log_sys->archiving_state == LOG_ARCH_OFF) { 02767 02768 return; 02769 } 02770 02771 group = UT_LIST_GET_FIRST(log_sys->log_groups); 02772 02773 trunc_len = UNIV_PAGE_SIZE 02774 * fil_space_get_size(group->archive_space_id); 02775 if (trunc_len > 0) { 02776 ut_a(trunc_len == group->file_size); 02777 02778 /* Write a notice to the headers of archived log 02779 files that the file write has been completed */ 02780 02781 log_group_archive_completed_header_write(group, 02782 0, log_sys->archived_lsn); 02783 02784 fil_space_truncate_start(group->archive_space_id, 02785 trunc_len); 02786 if (increment_file_count) { 02787 group->archived_offset = 0; 02788 group->archived_file_no += 2; 02789 } 02790 02791 #ifdef UNIV_DEBUG 02792 if (log_debug_writes) { 02793 fprintf(stderr, 02794 "Incrementing arch file no to %lu in log group %lu\n", 02795 (ulong) group->archived_file_no + 2, 02796 (ulong) group->id); 02797 } 02798 #endif /* UNIV_DEBUG */ 02799 } 02800 } 02801 02802 /******************************************************************** 02803 Writes the log contents to the archive up to the lsn when this function was 02804 called, and stops the archiving. When archiving is started again, the archived 02805 log file numbers start from 2 higher, so that the archiving will not write 02806 again to the archived log files which exist when this function returns. */ 02807 02808 ulint 02809 log_archive_stop(void) 02810 /*==================*/ 02811 /* out: DB_SUCCESS or DB_ERROR */ 02812 { 02813 ibool success; 02814 02815 mutex_enter(&(log_sys->mutex)); 02816 02817 if (log_sys->archiving_state != LOG_ARCH_ON) { 02818 02819 mutex_exit(&(log_sys->mutex)); 02820 02821 return(DB_ERROR); 02822 } 02823 02824 log_sys->archiving_state = LOG_ARCH_STOPPING; 02825 02826 mutex_exit(&(log_sys->mutex)); 02827 02828 log_archive_all(); 02829 02830 mutex_enter(&(log_sys->mutex)); 02831 02832 log_sys->archiving_state = LOG_ARCH_STOPPING2; 02833 os_event_reset(log_sys->archiving_on); 02834 02835 mutex_exit(&(log_sys->mutex)); 02836 02837 /* Wait for a possible archiving operation to end */ 02838 02839 rw_lock_s_lock(&(log_sys->archive_lock)); 02840 rw_lock_s_unlock(&(log_sys->archive_lock)); 02841 02842 mutex_enter(&(log_sys->mutex)); 02843 02844 /* Close all archived log files, incrementing the file count by 2, 02845 if appropriate */ 02846 02847 log_archive_close_groups(TRUE); 02848 02849 mutex_exit(&(log_sys->mutex)); 02850 02851 /* Make a checkpoint, so that if recovery is needed, the file numbers 02852 of new archived log files will start from the right value */ 02853 02854 success = FALSE; 02855 02856 while (!success) { 02857 success = log_checkpoint(TRUE, TRUE); 02858 } 02859 02860 mutex_enter(&(log_sys->mutex)); 02861 02862 log_sys->archiving_state = LOG_ARCH_STOPPED; 02863 02864 mutex_exit(&(log_sys->mutex)); 02865 02866 return(DB_SUCCESS); 02867 } 02868 02869 /******************************************************************** 02870 Starts again archiving which has been stopped. */ 02871 02872 ulint 02873 log_archive_start(void) 02874 /*===================*/ 02875 /* out: DB_SUCCESS or DB_ERROR */ 02876 { 02877 mutex_enter(&(log_sys->mutex)); 02878 02879 if (log_sys->archiving_state != LOG_ARCH_STOPPED) { 02880 02881 mutex_exit(&(log_sys->mutex)); 02882 02883 return(DB_ERROR); 02884 } 02885 02886 log_sys->archiving_state = LOG_ARCH_ON; 02887 02888 os_event_set(log_sys->archiving_on); 02889 02890 mutex_exit(&(log_sys->mutex)); 02891 02892 return(DB_SUCCESS); 02893 } 02894 02895 /******************************************************************** 02896 Stop archiving the log so that a gap may occur in the archived log files. */ 02897 02898 ulint 02899 log_archive_noarchivelog(void) 02900 /*==========================*/ 02901 /* out: DB_SUCCESS or DB_ERROR */ 02902 { 02903 loop: 02904 mutex_enter(&(log_sys->mutex)); 02905 02906 if (log_sys->archiving_state == LOG_ARCH_STOPPED 02907 || log_sys->archiving_state == LOG_ARCH_OFF) { 02908 02909 log_sys->archiving_state = LOG_ARCH_OFF; 02910 02911 os_event_set(log_sys->archiving_on); 02912 02913 mutex_exit(&(log_sys->mutex)); 02914 02915 return(DB_SUCCESS); 02916 } 02917 02918 mutex_exit(&(log_sys->mutex)); 02919 02920 log_archive_stop(); 02921 02922 os_thread_sleep(500000); 02923 02924 goto loop; 02925 } 02926 02927 /******************************************************************** 02928 Start archiving the log so that a gap may occur in the archived log files. */ 02929 02930 ulint 02931 log_archive_archivelog(void) 02932 /*========================*/ 02933 /* out: DB_SUCCESS or DB_ERROR */ 02934 { 02935 mutex_enter(&(log_sys->mutex)); 02936 02937 if (log_sys->archiving_state == LOG_ARCH_OFF) { 02938 02939 log_sys->archiving_state = LOG_ARCH_ON; 02940 02941 log_sys->archived_lsn = ut_dulint_align_down(log_sys->lsn, 02942 OS_FILE_LOG_BLOCK_SIZE); 02943 mutex_exit(&(log_sys->mutex)); 02944 02945 return(DB_SUCCESS); 02946 } 02947 02948 mutex_exit(&(log_sys->mutex)); 02949 02950 return(DB_ERROR); 02951 } 02952 02953 /******************************************************************** 02954 Tries to establish a big enough margin of free space in the log groups, such 02955 that a new log entry can be catenated without an immediate need for 02956 archiving. */ 02957 static 02958 void 02959 log_archive_margin(void) 02960 /*====================*/ 02961 { 02962 log_t* log = log_sys; 02963 ulint age; 02964 ibool sync; 02965 ulint dummy; 02966 loop: 02967 mutex_enter(&(log->mutex)); 02968 02969 if (log->archiving_state == LOG_ARCH_OFF) { 02970 mutex_exit(&(log->mutex)); 02971 02972 return; 02973 } 02974 02975 age = ut_dulint_minus(log->lsn, log->archived_lsn); 02976 02977 if (age > log->max_archived_lsn_age) { 02978 02979 /* An archiving is urgent: we have to do synchronous i/o */ 02980 02981 sync = TRUE; 02982 02983 } else if (age > log->max_archived_lsn_age_async) { 02984 02985 /* An archiving is not urgent: we do asynchronous i/o */ 02986 02987 sync = FALSE; 02988 } else { 02989 /* No archiving required yet */ 02990 02991 mutex_exit(&(log->mutex)); 02992 02993 return; 02994 } 02995 02996 mutex_exit(&(log->mutex)); 02997 02998 log_archive_do(sync, &dummy); 02999 03000 if (sync == TRUE) { 03001 /* Check again that enough was written to the archive */ 03002 03003 goto loop; 03004 } 03005 } 03006 #endif /* UNIV_LOG_ARCHIVE */ 03007 03008 /************************************************************************ 03009 Checks that there is enough free space in the log to start a new query step. 03010 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this 03011 function may only be called if the calling thread owns no synchronization 03012 objects! */ 03013 03014 void 03015 log_check_margins(void) 03016 /*===================*/ 03017 { 03018 loop: 03019 log_flush_margin(); 03020 03021 log_checkpoint_margin(); 03022 03023 #ifdef UNIV_LOG_ARCHIVE 03024 log_archive_margin(); 03025 #endif /* UNIV_LOG_ARCHIVE */ 03026 03027 mutex_enter(&(log_sys->mutex)); 03028 03029 if (log_sys->check_flush_or_checkpoint) { 03030 03031 mutex_exit(&(log_sys->mutex)); 03032 03033 goto loop; 03034 } 03035 03036 mutex_exit(&(log_sys->mutex)); 03037 } 03038 03039 /******************************************************************** 03040 Makes a checkpoint at the latest lsn and writes it to first page of each 03041 data file in the database, so that we know that the file spaces contain 03042 all modifications up to that lsn. This can only be called at database 03043 shutdown. This function also writes all log in log files to the log archive. */ 03044 03045 void 03046 logs_empty_and_mark_files_at_shutdown(void) 03047 /*=======================================*/ 03048 { 03049 dulint lsn; 03050 ulint arch_log_no; 03051 03052 if (srv_print_verbose_log) { 03053 ut_print_timestamp(stderr); 03054 fprintf(stderr, " InnoDB: Starting shutdown...\n"); 03055 } 03056 /* Wait until the master thread and all other operations are idle: our 03057 algorithm only works if the server is idle at shutdown */ 03058 03059 srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; 03060 loop: 03061 os_thread_sleep(100000); 03062 03063 mutex_enter(&kernel_mutex); 03064 03065 /* Check that there are no longer transactions. We need this wait 03066 even for the 'very fast' shutdown, because the InnoDB layer may have 03067 committed or prepared transactions and we don't want to lose 03068 them. */ 03069 03070 if (trx_n_mysql_transactions > 0 03071 || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { 03072 03073 mutex_exit(&kernel_mutex); 03074 03075 goto loop; 03076 } 03077 03078 if (srv_fast_shutdown == 2) { 03079 /* In this fastest shutdown we do not flush the buffer pool: 03080 it is essentially a 'crash' of the InnoDB server. Make sure 03081 that the log is all flushed to disk, so that we can recover 03082 all committed transactions in a crash recovery. We must not 03083 write the lsn stamps to the data files, since at a startup 03084 InnoDB deduces from the stamps if the previous shutdown was 03085 clean. */ 03086 03087 log_buffer_flush_to_disk(); 03088 03089 return; /* We SKIP ALL THE REST !! */ 03090 } 03091 03092 /* Check that the master thread is suspended */ 03093 03094 if (srv_n_threads_active[SRV_MASTER] != 0) { 03095 03096 mutex_exit(&kernel_mutex); 03097 03098 goto loop; 03099 } 03100 03101 mutex_exit(&kernel_mutex); 03102 03103 mutex_enter(&(log_sys->mutex)); 03104 03105 if ( 03106 #ifdef UNIV_LOG_ARCHIVE 03107 log_sys->n_pending_archive_ios || 03108 #endif /* UNIV_LOG_ARCHIVE */ 03109 log_sys->n_pending_checkpoint_writes || 03110 log_sys->n_pending_writes) { 03111 03112 mutex_exit(&(log_sys->mutex)); 03113 03114 goto loop; 03115 } 03116 03117 mutex_exit(&(log_sys->mutex)); 03118 03119 if (!buf_pool_check_no_pending_io()) { 03120 03121 goto loop; 03122 } 03123 03124 #ifdef UNIV_LOG_ARCHIVE 03125 log_archive_all(); 03126 #endif /* UNIV_LOG_ARCHIVE */ 03127 03128 log_make_checkpoint_at(ut_dulint_max, TRUE); 03129 03130 mutex_enter(&(log_sys->mutex)); 03131 03132 lsn = log_sys->lsn; 03133 03134 if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0) 03135 #ifdef UNIV_LOG_ARCHIVE 03136 || (srv_log_archive_on 03137 && ut_dulint_cmp(lsn, 03138 ut_dulint_add(log_sys->archived_lsn, 03139 LOG_BLOCK_HDR_SIZE)) 03140 != 0) 03141 #endif /* UNIV_LOG_ARCHIVE */ 03142 ) { 03143 03144 mutex_exit(&(log_sys->mutex)); 03145 03146 goto loop; 03147 } 03148 03149 arch_log_no = 0; 03150 03151 #ifdef UNIV_LOG_ARCHIVE 03152 UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no; 03153 03154 if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) { 03155 03156 arch_log_no--; 03157 } 03158 03159 log_archive_close_groups(TRUE); 03160 #endif /* UNIV_LOG_ARCHIVE */ 03161 03162 mutex_exit(&(log_sys->mutex)); 03163 03164 mutex_enter(&kernel_mutex); 03165 /* Check that the master thread has stayed suspended */ 03166 if (srv_n_threads_active[SRV_MASTER] != 0) { 03167 fprintf(stderr, 03168 "InnoDB: Warning: the master thread woke up during shutdown\n"); 03169 03170 mutex_exit(&kernel_mutex); 03171 03172 goto loop; 03173 } 03174 mutex_exit(&kernel_mutex); 03175 03176 fil_flush_file_spaces(FIL_TABLESPACE); 03177 fil_flush_file_spaces(FIL_LOG); 03178 03179 /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer 03180 pool: therefore it is essential that the buffer pool has been 03181 completely flushed to disk! (We do not call fil_write... if the 03182 'very fast' shutdown is enabled.) */ 03183 03184 if (!buf_all_freed()) { 03185 03186 goto loop; 03187 } 03188 03189 /* The lock timeout thread should now have exited */ 03190 03191 if (srv_lock_timeout_and_monitor_active) { 03192 03193 goto loop; 03194 } 03195 03196 /* We now let also the InnoDB error monitor thread to exit */ 03197 03198 srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; 03199 03200 if (srv_error_monitor_active) { 03201 03202 goto loop; 03203 } 03204 03205 /* Make some checks that the server really is quiet */ 03206 ut_a(srv_n_threads_active[SRV_MASTER] == 0); 03207 ut_a(buf_all_freed()); 03208 ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); 03209 03210 if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) { 03211 fprintf(stderr, 03212 "InnoDB: Error: log sequence number at shutdown %lu %lu\n" 03213 "InnoDB: is lower than at startup %lu %lu!\n", 03214 (ulong) ut_dulint_get_high(lsn), 03215 (ulong) ut_dulint_get_low(lsn), 03216 (ulong) ut_dulint_get_high(srv_start_lsn), 03217 (ulong) ut_dulint_get_low(srv_start_lsn)); 03218 } 03219 03220 srv_shutdown_lsn = lsn; 03221 03222 fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); 03223 03224 fil_flush_file_spaces(FIL_TABLESPACE); 03225 03226 fil_close_all_files(); 03227 03228 /* Make some checks that the server really is quiet */ 03229 ut_a(srv_n_threads_active[SRV_MASTER] == 0); 03230 ut_a(buf_all_freed()); 03231 ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); 03232 } 03233 03234 /********************************************************** 03235 Checks by parsing that the catenated log segment for a single mtr is 03236 consistent. */ 03237 03238 ibool 03239 log_check_log_recs( 03240 /*===============*/ 03241 byte* buf, /* in: pointer to the start of the log segment 03242 in the log_sys->buf log buffer */ 03243 ulint len, /* in: segment length in bytes */ 03244 dulint buf_start_lsn) /* in: buffer start lsn */ 03245 { 03246 dulint contiguous_lsn; 03247 dulint scanned_lsn; 03248 byte* start; 03249 byte* end; 03250 byte* buf1; 03251 byte* scan_buf; 03252 03253 #ifdef UNIV_SYNC_DEBUG 03254 ut_ad(mutex_own(&(log_sys->mutex))); 03255 #endif /* UNIV_SYNC_DEBUG */ 03256 03257 if (len == 0) { 03258 03259 return(TRUE); 03260 } 03261 03262 start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE); 03263 end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE); 03264 03265 buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE); 03266 scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE); 03267 03268 ut_memcpy(scan_buf, start, end - start); 03269 03270 recv_scan_log_recs(TRUE, 03271 (buf_pool->n_frames - 03272 recv_n_pool_free_frames) * UNIV_PAGE_SIZE, 03273 FALSE, scan_buf, end - start, 03274 ut_dulint_align_down(buf_start_lsn, 03275 OS_FILE_LOG_BLOCK_SIZE), 03276 &contiguous_lsn, &scanned_lsn); 03277 03278 ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len)) 03279 == 0); 03280 ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0); 03281 03282 mem_free(buf1); 03283 03284 return(TRUE); 03285 } 03286 03287 /********************************************************** 03288 Peeks the current lsn. */ 03289 03290 ibool 03291 log_peek_lsn( 03292 /*=========*/ 03293 /* out: TRUE if success, FALSE if could not get the 03294 log system mutex */ 03295 dulint* lsn) /* out: if returns TRUE, current lsn is here */ 03296 { 03297 if (0 == mutex_enter_nowait(&(log_sys->mutex), __FILE__, __LINE__)) { 03298 *lsn = log_sys->lsn; 03299 03300 mutex_exit(&(log_sys->mutex)); 03301 03302 return(TRUE); 03303 } 03304 03305 return(FALSE); 03306 } 03307 03308 /********************************************************** 03309 Prints info of the log. */ 03310 03311 void 03312 log_print( 03313 /*======*/ 03314 FILE* file) /* in: file where to print */ 03315 { 03316 double time_elapsed; 03317 time_t current_time; 03318 03319 mutex_enter(&(log_sys->mutex)); 03320 03321 fprintf(file, 03322 "Log sequence number %lu %lu\n" 03323 "Log flushed up to %lu %lu\n" 03324 "Last checkpoint at %lu %lu\n", 03325 (ulong) ut_dulint_get_high(log_sys->lsn), 03326 (ulong) ut_dulint_get_low(log_sys->lsn), 03327 (ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn), 03328 (ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn), 03329 (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), 03330 (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); 03331 03332 current_time = time(NULL); 03333 03334 time_elapsed = 0.001 + difftime(current_time, 03335 log_sys->last_printout_time); 03336 fprintf(file, 03337 "%lu pending log writes, %lu pending chkp writes\n" 03338 "%lu log i/o's done, %.2f log i/o's/second\n", 03339 (ulong) log_sys->n_pending_writes, 03340 (ulong) log_sys->n_pending_checkpoint_writes, 03341 (ulong) log_sys->n_log_ios, 03342 ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); 03343 03344 log_sys->n_log_ios_old = log_sys->n_log_ios; 03345 log_sys->last_printout_time = current_time; 03346 03347 mutex_exit(&(log_sys->mutex)); 03348 } 03349 03350 /************************************************************************** 03351 Refreshes the statistics used to print per-second averages. */ 03352 03353 void 03354 log_refresh_stats(void) 03355 /*===================*/ 03356 { 03357 log_sys->n_log_ios_old = log_sys->n_log_ios; 03358 log_sys->last_printout_time = time(NULL); 03359 }
1.4.7

