00001 /****************************************************** 00002 Transaction rollback 00003 00004 (c) 1996 Innobase Oy 00005 00006 Created 3/26/1996 Heikki Tuuri 00007 *******************************************************/ 00008 00009 #include "trx0roll.h" 00010 00011 #ifdef UNIV_NONINL 00012 #include "trx0roll.ic" 00013 #endif 00014 00015 #include "fsp0fsp.h" 00016 #include "mach0data.h" 00017 #include "trx0rseg.h" 00018 #include "trx0trx.h" 00019 #include "trx0undo.h" 00020 #include "trx0rec.h" 00021 #include "que0que.h" 00022 #include "usr0sess.h" 00023 #include "srv0que.h" 00024 #include "srv0start.h" 00025 #include "row0undo.h" 00026 #include "row0mysql.h" 00027 #include "lock0lock.h" 00028 #include "pars0pars.h" 00029 00030 /* This many pages must be undone before a truncate is tried within rollback */ 00031 #define TRX_ROLL_TRUNC_THRESHOLD 1 00032 00033 /* In crash recovery, the current trx to be rolled back */ 00034 trx_t* trx_roll_crash_recv_trx = NULL; 00035 00036 /* In crash recovery we set this to the undo n:o of the current trx to be 00037 rolled back. Then we can print how many % the rollback has progressed. */ 00038 ib_longlong trx_roll_max_undo_no; 00039 00040 /* Auxiliary variable which tells the previous progress % we printed */ 00041 ulint trx_roll_progress_printed_pct; 00042 00043 /*********************************************************************** 00044 Rollback a transaction used in MySQL. */ 00045 00046 int 00047 trx_general_rollback_for_mysql( 00048 /*===========================*/ 00049 /* out: error code or DB_SUCCESS */ 00050 trx_t* trx, /* in: transaction handle */ 00051 ibool partial,/* in: TRUE if partial rollback requested */ 00052 trx_savept_t* savept) /* in: pointer to savepoint undo number, if 00053 partial rollback requested */ 00054 { 00055 #ifndef UNIV_HOTBACKUP 00056 mem_heap_t* heap; 00057 que_thr_t* thr; 00058 roll_node_t* roll_node; 00059 00060 /* Tell Innobase server that there might be work for 00061 utility threads: */ 00062 00063 srv_active_wake_master_thread(); 00064 00065 trx_start_if_not_started(trx); 00066 00067 heap = mem_heap_create(512); 00068 00069 roll_node = roll_node_create(heap); 00070 00071 roll_node->partial = partial; 00072 00073 if (partial) { 00074 roll_node->savept = *savept; 00075 } 00076 00077 trx->error_state = DB_SUCCESS; 00078 00079 thr = pars_complete_graph_for_exec(roll_node, trx, heap); 00080 00081 ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); 00082 que_run_threads(thr); 00083 00084 mutex_enter(&kernel_mutex); 00085 00086 while (trx->que_state != TRX_QUE_RUNNING) { 00087 00088 mutex_exit(&kernel_mutex); 00089 00090 os_thread_sleep(100000); 00091 00092 mutex_enter(&kernel_mutex); 00093 } 00094 00095 mutex_exit(&kernel_mutex); 00096 00097 mem_heap_free(heap); 00098 00099 ut_a(trx->error_state == DB_SUCCESS); 00100 00101 /* Tell Innobase server that there might be work for 00102 utility threads: */ 00103 00104 srv_active_wake_master_thread(); 00105 00106 return((int) trx->error_state); 00107 #else /* UNIV_HOTBACKUP */ 00108 /* This function depends on MySQL code that is not included in 00109 InnoDB Hot Backup builds. Besides, this function should never 00110 be called in InnoDB Hot Backup. */ 00111 ut_error; 00112 return(DB_FAIL); 00113 #endif /* UNIV_HOTBACKUP */ 00114 } 00115 00116 /*********************************************************************** 00117 Rollback a transaction used in MySQL. */ 00118 00119 int 00120 trx_rollback_for_mysql( 00121 /*===================*/ 00122 /* out: error code or DB_SUCCESS */ 00123 trx_t* trx) /* in: transaction handle */ 00124 { 00125 int err; 00126 00127 if (trx->conc_state == TRX_NOT_STARTED) { 00128 00129 return(DB_SUCCESS); 00130 } 00131 00132 trx->op_info = "rollback"; 00133 00134 err = trx_general_rollback_for_mysql(trx, FALSE, NULL); 00135 00136 trx->op_info = ""; 00137 00138 return(err); 00139 } 00140 00141 /*********************************************************************** 00142 Rollback the latest SQL statement for MySQL. */ 00143 00144 int 00145 trx_rollback_last_sql_stat_for_mysql( 00146 /*=================================*/ 00147 /* out: error code or DB_SUCCESS */ 00148 trx_t* trx) /* in: transaction handle */ 00149 { 00150 int err; 00151 00152 if (trx->conc_state == TRX_NOT_STARTED) { 00153 00154 return(DB_SUCCESS); 00155 } 00156 00157 trx->op_info = "rollback of SQL statement"; 00158 00159 err = trx_general_rollback_for_mysql(trx, TRUE, 00160 &(trx->last_sql_stat_start)); 00161 /* The following call should not be needed, but we play safe: */ 00162 trx_mark_sql_stat_end(trx); 00163 00164 trx->op_info = ""; 00165 00166 return(err); 00167 } 00168 00169 /*********************************************************************** 00170 Frees savepoint structs. */ 00171 00172 void 00173 trx_roll_savepoints_free( 00174 /*=====================*/ 00175 trx_t* trx, /* in: transaction handle */ 00176 trx_named_savept_t* savep) /* in: free all savepoints > this one; 00177 if this is NULL, free all savepoints 00178 of trx */ 00179 { 00180 trx_named_savept_t* next_savep; 00181 00182 if (savep == NULL) { 00183 savep = UT_LIST_GET_FIRST(trx->trx_savepoints); 00184 } else { 00185 savep = UT_LIST_GET_NEXT(trx_savepoints, savep); 00186 } 00187 00188 while (savep != NULL) { 00189 next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); 00190 00191 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); 00192 mem_free(savep->name); 00193 mem_free(savep); 00194 00195 savep = next_savep; 00196 } 00197 } 00198 00199 /*********************************************************************** 00200 Rolls back a transaction back to a named savepoint. Modifications after the 00201 savepoint are undone but InnoDB does NOT release the corresponding locks 00202 which are stored in memory. If a lock is 'implicit', that is, a new inserted 00203 row holds a lock where the lock information is carried by the trx id stored in 00204 the row, these locks are naturally released in the rollback. Savepoints which 00205 were set after this savepoint are deleted. */ 00206 00207 ulint 00208 trx_rollback_to_savepoint_for_mysql( 00209 /*================================*/ 00210 /* out: if no savepoint 00211 of the name found then 00212 DB_NO_SAVEPOINT, 00213 otherwise DB_SUCCESS */ 00214 trx_t* trx, /* in: transaction handle */ 00215 const char* savepoint_name, /* in: savepoint name */ 00216 ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache 00217 position corresponding to this 00218 savepoint; MySQL needs this 00219 information to remove the 00220 binlog entries of the queries 00221 executed after the savepoint */ 00222 { 00223 trx_named_savept_t* savep; 00224 ulint err; 00225 00226 savep = UT_LIST_GET_FIRST(trx->trx_savepoints); 00227 00228 while (savep != NULL) { 00229 if (0 == ut_strcmp(savep->name, savepoint_name)) { 00230 /* Found */ 00231 break; 00232 } 00233 savep = UT_LIST_GET_NEXT(trx_savepoints, savep); 00234 } 00235 00236 if (savep == NULL) { 00237 00238 return(DB_NO_SAVEPOINT); 00239 } 00240 00241 if (trx->conc_state == TRX_NOT_STARTED) { 00242 ut_print_timestamp(stderr); 00243 fputs(" InnoDB: Error: transaction has a savepoint ", stderr); 00244 ut_print_name(stderr, trx, FALSE, savep->name); 00245 fputs(" though it is not started\n", stderr); 00246 return(DB_ERROR); 00247 } 00248 00249 /* We can now free all savepoints strictly later than this one */ 00250 00251 trx_roll_savepoints_free(trx, savep); 00252 00253 *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; 00254 00255 trx->op_info = "rollback to a savepoint"; 00256 00257 err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept)); 00258 00259 /* Store the current undo_no of the transaction so that we know where 00260 to roll back if we have to roll back the next SQL statement: */ 00261 00262 trx_mark_sql_stat_end(trx); 00263 00264 trx->op_info = ""; 00265 00266 return(err); 00267 } 00268 00269 /*********************************************************************** 00270 Creates a named savepoint. If the transaction is not yet started, starts it. 00271 If there is already a savepoint of the same name, this call erases that old 00272 savepoint and replaces it with a new. Savepoints are deleted in a transaction 00273 commit or rollback. */ 00274 00275 ulint 00276 trx_savepoint_for_mysql( 00277 /*====================*/ 00278 /* out: always DB_SUCCESS */ 00279 trx_t* trx, /* in: transaction handle */ 00280 const char* savepoint_name, /* in: savepoint name */ 00281 ib_longlong binlog_cache_pos) /* in: MySQL binlog cache 00282 position corresponding to this 00283 connection at the time of the 00284 savepoint */ 00285 { 00286 trx_named_savept_t* savep; 00287 00288 ut_a(trx); 00289 ut_a(savepoint_name); 00290 00291 trx_start_if_not_started(trx); 00292 00293 savep = UT_LIST_GET_FIRST(trx->trx_savepoints); 00294 00295 while (savep != NULL) { 00296 if (0 == ut_strcmp(savep->name, savepoint_name)) { 00297 /* Found */ 00298 break; 00299 } 00300 savep = UT_LIST_GET_NEXT(trx_savepoints, savep); 00301 } 00302 00303 if (savep) { 00304 /* There is a savepoint with the same name: free that */ 00305 00306 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); 00307 00308 mem_free(savep->name); 00309 mem_free(savep); 00310 } 00311 00312 /* Create a new savepoint and add it as the last in the list */ 00313 00314 savep = mem_alloc(sizeof(trx_named_savept_t)); 00315 00316 savep->name = mem_strdup(savepoint_name); 00317 00318 savep->savept = trx_savept_take(trx); 00319 00320 savep->mysql_binlog_cache_pos = binlog_cache_pos; 00321 00322 UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep); 00323 00324 return(DB_SUCCESS); 00325 } 00326 00327 /*********************************************************************** 00328 Releases a named savepoint. Savepoints which 00329 were set after this savepoint are deleted. */ 00330 00331 ulint 00332 trx_release_savepoint_for_mysql( 00333 /*============================*/ 00334 /* out: if no savepoint 00335 of the name found then 00336 DB_NO_SAVEPOINT, 00337 otherwise DB_SUCCESS */ 00338 trx_t* trx, /* in: transaction handle */ 00339 const char* savepoint_name) /* in: savepoint name */ 00340 { 00341 trx_named_savept_t* savep; 00342 00343 savep = UT_LIST_GET_FIRST(trx->trx_savepoints); 00344 00345 while (savep != NULL) { 00346 if (0 == ut_strcmp(savep->name, savepoint_name)) { 00347 /* Found */ 00348 break; 00349 } 00350 savep = UT_LIST_GET_NEXT(trx_savepoints, savep); 00351 } 00352 00353 if (savep == NULL) { 00354 00355 return(DB_NO_SAVEPOINT); 00356 } 00357 00358 /* We can now free all savepoints strictly later than this one */ 00359 00360 trx_roll_savepoints_free(trx, savep); 00361 00362 /* Now we can free this savepoint too */ 00363 00364 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); 00365 00366 mem_free(savep->name); 00367 mem_free(savep); 00368 00369 return(DB_SUCCESS); 00370 } 00371 00372 /*********************************************************************** 00373 Returns a transaction savepoint taken at this point in time. */ 00374 00375 trx_savept_t 00376 trx_savept_take( 00377 /*============*/ 00378 /* out: savepoint */ 00379 trx_t* trx) /* in: transaction */ 00380 { 00381 trx_savept_t savept; 00382 00383 savept.least_undo_no = trx->undo_no; 00384 00385 return(savept); 00386 } 00387 00388 /*********************************************************************** 00389 Rollback or clean up transactions which have no user session. If the 00390 transaction already was committed, then we clean up a possible insert 00391 undo log. If the transaction was not yet committed, then we roll it back. 00392 Note: this is done in a background thread. */ 00393 00394 os_thread_ret_t 00395 trx_rollback_or_clean_all_without_sess( 00396 /*===================================*/ 00397 /* out: a dummy parameter */ 00398 void* arg __attribute__((unused))) 00399 /* in: a dummy parameter required by 00400 os_thread_create */ 00401 { 00402 mem_heap_t* heap; 00403 que_fork_t* fork; 00404 que_thr_t* thr; 00405 roll_node_t* roll_node; 00406 trx_t* trx; 00407 dict_table_t* table; 00408 ib_longlong rows_to_undo; 00409 const char* unit = ""; 00410 int err; 00411 00412 mutex_enter(&kernel_mutex); 00413 00414 /* Open a dummy session */ 00415 00416 if (!trx_dummy_sess) { 00417 trx_dummy_sess = sess_open(); 00418 } 00419 00420 mutex_exit(&kernel_mutex); 00421 00422 if (UT_LIST_GET_FIRST(trx_sys->trx_list)) { 00423 00424 fprintf(stderr, 00425 "InnoDB: Starting in background the rollback of uncommitted transactions\n"); 00426 } else { 00427 goto leave_function; 00428 } 00429 loop: 00430 heap = mem_heap_create(512); 00431 00432 mutex_enter(&kernel_mutex); 00433 00434 trx = UT_LIST_GET_FIRST(trx_sys->trx_list); 00435 00436 while (trx) { 00437 if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { 00438 trx = UT_LIST_GET_NEXT(trx_list, trx); 00439 } else if (trx->conc_state == TRX_PREPARED) { 00440 00441 trx->sess = trx_dummy_sess; 00442 trx = UT_LIST_GET_NEXT(trx_list, trx); 00443 } else { 00444 break; 00445 } 00446 } 00447 00448 mutex_exit(&kernel_mutex); 00449 00450 if (trx == NULL) { 00451 ut_print_timestamp(stderr); 00452 fprintf(stderr, 00453 " InnoDB: Rollback of non-prepared transactions completed\n"); 00454 00455 mem_heap_free(heap); 00456 00457 goto leave_function; 00458 } 00459 00460 trx->sess = trx_dummy_sess; 00461 00462 if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) { 00463 fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n", 00464 (ulong) ut_dulint_get_high(trx->id), 00465 (ulong) ut_dulint_get_low(trx->id)); 00466 00467 trx_cleanup_at_db_startup(trx); 00468 00469 mem_heap_free(heap); 00470 00471 goto loop; 00472 } 00473 00474 fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); 00475 fork->trx = trx; 00476 00477 thr = que_thr_create(fork, heap); 00478 00479 roll_node = roll_node_create(heap); 00480 00481 thr->child = roll_node; 00482 roll_node->common.parent = thr; 00483 00484 mutex_enter(&kernel_mutex); 00485 00486 trx->graph = fork; 00487 00488 ut_a(thr == que_fork_start_command(fork)); 00489 00490 trx_roll_crash_recv_trx = trx; 00491 trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no); 00492 trx_roll_progress_printed_pct = 0; 00493 rows_to_undo = trx_roll_max_undo_no; 00494 00495 if (rows_to_undo > 1000000000) { 00496 rows_to_undo = rows_to_undo / 1000000; 00497 unit = "M"; 00498 } 00499 00500 ut_print_timestamp(stderr); 00501 fprintf(stderr, 00502 " InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo\n", 00503 (ulong) ut_dulint_get_high(trx->id), 00504 (ulong) ut_dulint_get_low(trx->id), 00505 (ulong) rows_to_undo, unit); 00506 mutex_exit(&kernel_mutex); 00507 00508 trx->mysql_thread_id = os_thread_get_curr_id(); 00509 00510 trx->mysql_process_no = os_proc_get_number(); 00511 00512 if (trx->dict_operation) { 00513 row_mysql_lock_data_dictionary(trx); 00514 } 00515 00516 que_run_threads(thr); 00517 00518 mutex_enter(&kernel_mutex); 00519 00520 while (trx->que_state != TRX_QUE_RUNNING) { 00521 00522 mutex_exit(&kernel_mutex); 00523 00524 fprintf(stderr, 00525 "InnoDB: Waiting for rollback of trx id %lu to end\n", 00526 (ulong) ut_dulint_get_low(trx->id)); 00527 os_thread_sleep(100000); 00528 00529 mutex_enter(&kernel_mutex); 00530 } 00531 00532 mutex_exit(&kernel_mutex); 00533 00534 if (trx->dict_operation) { 00535 /* If the transaction was for a dictionary operation, we 00536 drop the relevant table, if it still exists */ 00537 00538 fprintf(stderr, 00539 "InnoDB: Dropping table with id %lu %lu in recovery if it exists\n", 00540 (ulong) ut_dulint_get_high(trx->table_id), 00541 (ulong) ut_dulint_get_low(trx->table_id)); 00542 00543 table = dict_table_get_on_id_low(trx->table_id); 00544 00545 if (table) { 00546 fputs("InnoDB: Table found: dropping table ", stderr); 00547 ut_print_name(stderr, trx, TRUE, table->name); 00548 fputs(" in recovery\n", stderr); 00549 00550 err = row_drop_table_for_mysql(table->name, trx, TRUE); 00551 00552 ut_a(err == (int) DB_SUCCESS); 00553 } 00554 } 00555 00556 if (trx->dict_operation) { 00557 row_mysql_unlock_data_dictionary(trx); 00558 } 00559 00560 fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n", 00561 (ulong) ut_dulint_get_high(trx->id), 00562 (ulong) ut_dulint_get_low(trx->id)); 00563 mem_heap_free(heap); 00564 00565 trx_roll_crash_recv_trx = NULL; 00566 00567 goto loop; 00568 00569 leave_function: 00570 /* We count the number of threads in os_thread_exit(). A created 00571 thread should always use that to exit and not use return() to exit. */ 00572 00573 os_thread_exit(NULL); 00574 00575 OS_THREAD_DUMMY_RETURN; 00576 } 00577 00578 /*********************************************************************** 00579 Creates an undo number array. */ 00580 00581 trx_undo_arr_t* 00582 trx_undo_arr_create(void) 00583 /*=====================*/ 00584 { 00585 trx_undo_arr_t* arr; 00586 mem_heap_t* heap; 00587 ulint i; 00588 00589 heap = mem_heap_create(1024); 00590 00591 arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t)); 00592 00593 arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t) 00594 * UNIV_MAX_PARALLELISM); 00595 arr->n_cells = UNIV_MAX_PARALLELISM; 00596 arr->n_used = 0; 00597 00598 arr->heap = heap; 00599 00600 for (i = 0; i < UNIV_MAX_PARALLELISM; i++) { 00601 00602 (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE; 00603 } 00604 00605 return(arr); 00606 } 00607 00608 /*********************************************************************** 00609 Frees an undo number array. */ 00610 00611 void 00612 trx_undo_arr_free( 00613 /*==============*/ 00614 trx_undo_arr_t* arr) /* in: undo number array */ 00615 { 00616 ut_ad(arr->n_used == 0); 00617 00618 mem_heap_free(arr->heap); 00619 } 00620 00621 /*********************************************************************** 00622 Stores info of an undo log record to the array if it is not stored yet. */ 00623 static 00624 ibool 00625 trx_undo_arr_store_info( 00626 /*====================*/ 00627 /* out: FALSE if the record already existed in the 00628 array */ 00629 trx_t* trx, /* in: transaction */ 00630 dulint undo_no)/* in: undo number */ 00631 { 00632 trx_undo_inf_t* cell; 00633 trx_undo_inf_t* stored_here; 00634 trx_undo_arr_t* arr; 00635 ulint n_used; 00636 ulint n; 00637 ulint i; 00638 00639 n = 0; 00640 arr = trx->undo_no_arr; 00641 n_used = arr->n_used; 00642 stored_here = NULL; 00643 00644 for (i = 0;; i++) { 00645 cell = trx_undo_arr_get_nth_info(arr, i); 00646 00647 if (!cell->in_use) { 00648 if (!stored_here) { 00649 /* Not in use, we may store here */ 00650 cell->undo_no = undo_no; 00651 cell->in_use = TRUE; 00652 00653 arr->n_used++; 00654 00655 stored_here = cell; 00656 } 00657 } else { 00658 n++; 00659 00660 if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) { 00661 00662 if (stored_here) { 00663 stored_here->in_use = FALSE; 00664 ut_ad(arr->n_used > 0); 00665 arr->n_used--; 00666 } 00667 00668 ut_ad(arr->n_used == n_used); 00669 00670 return(FALSE); 00671 } 00672 } 00673 00674 if (n == n_used && stored_here) { 00675 00676 ut_ad(arr->n_used == 1 + n_used); 00677 00678 return(TRUE); 00679 } 00680 } 00681 } 00682 00683 /*********************************************************************** 00684 Removes an undo number from the array. */ 00685 static 00686 void 00687 trx_undo_arr_remove_info( 00688 /*=====================*/ 00689 trx_undo_arr_t* arr, /* in: undo number array */ 00690 dulint undo_no)/* in: undo number */ 00691 { 00692 trx_undo_inf_t* cell; 00693 ulint n_used; 00694 ulint n; 00695 ulint i; 00696 00697 n_used = arr->n_used; 00698 n = 0; 00699 00700 for (i = 0;; i++) { 00701 cell = trx_undo_arr_get_nth_info(arr, i); 00702 00703 if (cell->in_use 00704 && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) { 00705 00706 cell->in_use = FALSE; 00707 00708 ut_ad(arr->n_used > 0); 00709 00710 arr->n_used--; 00711 00712 return; 00713 } 00714 } 00715 } 00716 00717 /*********************************************************************** 00718 Gets the biggest undo number in an array. */ 00719 static 00720 dulint 00721 trx_undo_arr_get_biggest( 00722 /*=====================*/ 00723 /* out: biggest value, ut_dulint_zero if 00724 the array is empty */ 00725 trx_undo_arr_t* arr) /* in: undo number array */ 00726 { 00727 trx_undo_inf_t* cell; 00728 ulint n_used; 00729 dulint biggest; 00730 ulint n; 00731 ulint i; 00732 00733 n = 0; 00734 n_used = arr->n_used; 00735 biggest = ut_dulint_zero; 00736 00737 for (i = 0;; i++) { 00738 cell = trx_undo_arr_get_nth_info(arr, i); 00739 00740 if (cell->in_use) { 00741 n++; 00742 if (ut_dulint_cmp(cell->undo_no, biggest) > 0) { 00743 00744 biggest = cell->undo_no; 00745 } 00746 } 00747 00748 if (n == n_used) { 00749 return(biggest); 00750 } 00751 } 00752 } 00753 00754 /*************************************************************************** 00755 Tries truncate the undo logs. */ 00756 00757 void 00758 trx_roll_try_truncate( 00759 /*==================*/ 00760 trx_t* trx) /* in: transaction */ 00761 { 00762 trx_undo_arr_t* arr; 00763 dulint limit; 00764 dulint biggest; 00765 00766 #ifdef UNIV_SYNC_DEBUG 00767 ut_ad(mutex_own(&(trx->undo_mutex))); 00768 ut_ad(mutex_own(&((trx->rseg)->mutex))); 00769 #endif /* UNIV_SYNC_DEBUG */ 00770 00771 trx->pages_undone = 0; 00772 00773 arr = trx->undo_no_arr; 00774 00775 limit = trx->undo_no; 00776 00777 if (arr->n_used > 0) { 00778 biggest = trx_undo_arr_get_biggest(arr); 00779 00780 if (ut_dulint_cmp(biggest, limit) >= 0) { 00781 00782 limit = ut_dulint_add(biggest, 1); 00783 } 00784 } 00785 00786 if (trx->insert_undo) { 00787 trx_undo_truncate_end(trx, trx->insert_undo, limit); 00788 } 00789 00790 if (trx->update_undo) { 00791 trx_undo_truncate_end(trx, trx->update_undo, limit); 00792 } 00793 } 00794 00795 /*************************************************************************** 00796 Pops the topmost undo log record in a single undo log and updates the info 00797 about the topmost record in the undo log memory struct. */ 00798 static 00799 trx_undo_rec_t* 00800 trx_roll_pop_top_rec( 00801 /*=================*/ 00802 /* out: undo log record, the page s-latched */ 00803 trx_t* trx, /* in: transaction */ 00804 trx_undo_t* undo, /* in: undo log */ 00805 mtr_t* mtr) /* in: mtr */ 00806 { 00807 page_t* undo_page; 00808 ulint offset; 00809 trx_undo_rec_t* prev_rec; 00810 page_t* prev_rec_page; 00811 00812 #ifdef UNIV_SYNC_DEBUG 00813 ut_ad(mutex_own(&(trx->undo_mutex))); 00814 #endif /* UNIV_SYNC_DEBUG */ 00815 00816 undo_page = trx_undo_page_get_s_latched(undo->space, 00817 undo->top_page_no, mtr); 00818 offset = undo->top_offset; 00819 00820 /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n", 00821 os_thread_get_curr_id(), ut_dulint_get_low(trx->id), 00822 ut_dulint_get_low(undo->top_undo_no)); */ 00823 00824 prev_rec = trx_undo_get_prev_rec(undo_page + offset, 00825 undo->hdr_page_no, undo->hdr_offset, 00826 mtr); 00827 if (prev_rec == NULL) { 00828 00829 undo->empty = TRUE; 00830 } else { 00831 prev_rec_page = buf_frame_align(prev_rec); 00832 00833 if (prev_rec_page != undo_page) { 00834 00835 trx->pages_undone++; 00836 } 00837 00838 undo->top_page_no = buf_frame_get_page_no(prev_rec_page); 00839 undo->top_offset = prev_rec - prev_rec_page; 00840 undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); 00841 } 00842 00843 return(undo_page + offset); 00844 } 00845 00846 /************************************************************************ 00847 Pops the topmost record when the two undo logs of a transaction are seen 00848 as a single stack of records ordered by their undo numbers. Inserts the 00849 undo number of the popped undo record to the array of currently processed 00850 undo numbers in the transaction. When the query thread finishes processing 00851 of this undo record, it must be released with trx_undo_rec_release. */ 00852 00853 trx_undo_rec_t* 00854 trx_roll_pop_top_rec_of_trx( 00855 /*========================*/ 00856 /* out: undo log record copied to heap, NULL 00857 if none left, or if the undo number of the 00858 top record would be less than the limit */ 00859 trx_t* trx, /* in: transaction */ 00860 dulint limit, /* in: least undo number we need */ 00861 dulint* roll_ptr,/* out: roll pointer to undo record */ 00862 mem_heap_t* heap) /* in: memory heap where copied */ 00863 { 00864 trx_undo_t* undo; 00865 trx_undo_t* ins_undo; 00866 trx_undo_t* upd_undo; 00867 trx_undo_rec_t* undo_rec; 00868 trx_undo_rec_t* undo_rec_copy; 00869 dulint undo_no; 00870 ibool is_insert; 00871 trx_rseg_t* rseg; 00872 ulint progress_pct; 00873 mtr_t mtr; 00874 00875 rseg = trx->rseg; 00876 try_again: 00877 mutex_enter(&(trx->undo_mutex)); 00878 00879 if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { 00880 mutex_enter(&(rseg->mutex)); 00881 00882 trx_roll_try_truncate(trx); 00883 00884 mutex_exit(&(rseg->mutex)); 00885 } 00886 00887 ins_undo = trx->insert_undo; 00888 upd_undo = trx->update_undo; 00889 00890 if (!ins_undo || ins_undo->empty) { 00891 undo = upd_undo; 00892 } else if (!upd_undo || upd_undo->empty) { 00893 undo = ins_undo; 00894 } else if (ut_dulint_cmp(upd_undo->top_undo_no, 00895 ins_undo->top_undo_no) > 0) { 00896 undo = upd_undo; 00897 } else { 00898 undo = ins_undo; 00899 } 00900 00901 if (!undo || undo->empty 00902 || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) { 00903 00904 if ((trx->undo_no_arr)->n_used == 0) { 00905 /* Rollback is ending */ 00906 00907 mutex_enter(&(rseg->mutex)); 00908 00909 trx_roll_try_truncate(trx); 00910 00911 mutex_exit(&(rseg->mutex)); 00912 } 00913 00914 mutex_exit(&(trx->undo_mutex)); 00915 00916 return(NULL); 00917 } 00918 00919 if (undo == ins_undo) { 00920 is_insert = TRUE; 00921 } else { 00922 is_insert = FALSE; 00923 } 00924 00925 *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id, 00926 undo->top_page_no, undo->top_offset); 00927 mtr_start(&mtr); 00928 00929 undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); 00930 00931 undo_no = trx_undo_rec_get_undo_no(undo_rec); 00932 00933 ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); 00934 00935 /* We print rollback progress info if we are in a crash recovery 00936 and the transaction has at least 1000 row operations to undo. */ 00937 00938 if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) { 00939 00940 progress_pct = 100 - (ulint) 00941 ((ut_conv_dulint_to_longlong(undo_no) * 100) 00942 / trx_roll_max_undo_no); 00943 if (progress_pct != trx_roll_progress_printed_pct) { 00944 if (trx_roll_progress_printed_pct == 0) { 00945 fprintf(stderr, 00946 "\nInnoDB: Progress in percents: %lu", (ulong) progress_pct); 00947 } else { 00948 fprintf(stderr, 00949 " %lu", (ulong) progress_pct); 00950 } 00951 fflush(stderr); 00952 trx_roll_progress_printed_pct = progress_pct; 00953 } 00954 } 00955 00956 trx->undo_no = undo_no; 00957 00958 if (!trx_undo_arr_store_info(trx, undo_no)) { 00959 /* A query thread is already processing this undo log record */ 00960 00961 mutex_exit(&(trx->undo_mutex)); 00962 00963 mtr_commit(&mtr); 00964 00965 goto try_again; 00966 } 00967 00968 undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); 00969 00970 mutex_exit(&(trx->undo_mutex)); 00971 00972 mtr_commit(&mtr); 00973 00974 return(undo_rec_copy); 00975 } 00976 00977 /************************************************************************ 00978 Reserves an undo log record for a query thread to undo. This should be 00979 called if the query thread gets the undo log record not using the pop 00980 function above. */ 00981 00982 ibool 00983 trx_undo_rec_reserve( 00984 /*=================*/ 00985 /* out: TRUE if succeeded */ 00986 trx_t* trx, /* in: transaction */ 00987 dulint undo_no)/* in: undo number of the record */ 00988 { 00989 ibool ret; 00990 00991 mutex_enter(&(trx->undo_mutex)); 00992 00993 ret = trx_undo_arr_store_info(trx, undo_no); 00994 00995 mutex_exit(&(trx->undo_mutex)); 00996 00997 return(ret); 00998 } 00999 01000 /*********************************************************************** 01001 Releases a reserved undo record. */ 01002 01003 void 01004 trx_undo_rec_release( 01005 /*=================*/ 01006 trx_t* trx, /* in: transaction */ 01007 dulint undo_no)/* in: undo number */ 01008 { 01009 trx_undo_arr_t* arr; 01010 01011 mutex_enter(&(trx->undo_mutex)); 01012 01013 arr = trx->undo_no_arr; 01014 01015 trx_undo_arr_remove_info(arr, undo_no); 01016 01017 mutex_exit(&(trx->undo_mutex)); 01018 } 01019 01020 /************************************************************************* 01021 Starts a rollback operation. */ 01022 01023 void 01024 trx_rollback( 01025 /*=========*/ 01026 trx_t* trx, /* in: transaction */ 01027 trx_sig_t* sig, /* in: signal starting the rollback */ 01028 que_thr_t** next_thr)/* in/out: next query thread to run; 01029 if the value which is passed in is 01030 a pointer to a NULL pointer, then the 01031 calling function can start running 01032 a new query thread; if the passed value is 01033 NULL, the parameter is ignored */ 01034 { 01035 que_t* roll_graph; 01036 que_thr_t* thr; 01037 /* que_thr_t* thr2; */ 01038 01039 #ifdef UNIV_SYNC_DEBUG 01040 ut_ad(mutex_own(&kernel_mutex)); 01041 #endif /* UNIV_SYNC_DEBUG */ 01042 ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0)); 01043 01044 /* Initialize the rollback field in the transaction */ 01045 01046 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { 01047 01048 trx->roll_limit = ut_dulint_zero; 01049 01050 } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { 01051 01052 trx->roll_limit = (sig->savept).least_undo_no; 01053 01054 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { 01055 01056 trx->roll_limit = trx->last_sql_stat_start.least_undo_no; 01057 } else { 01058 ut_error; 01059 } 01060 01061 ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0); 01062 01063 trx->pages_undone = 0; 01064 01065 if (trx->undo_no_arr == NULL) { 01066 trx->undo_no_arr = trx_undo_arr_create(); 01067 } 01068 01069 /* Build a 'query' graph which will perform the undo operations */ 01070 01071 roll_graph = trx_roll_graph_build(trx); 01072 01073 trx->graph = roll_graph; 01074 trx->que_state = TRX_QUE_ROLLING_BACK; 01075 01076 thr = que_fork_start_command(roll_graph); 01077 01078 ut_ad(thr); 01079 01080 /* thr2 = que_fork_start_command(roll_graph); 01081 01082 ut_ad(thr2); */ 01083 01084 if (next_thr && (*next_thr == NULL)) { 01085 *next_thr = thr; 01086 /* srv_que_task_enqueue_low(thr2); */ 01087 } else { 01088 srv_que_task_enqueue_low(thr); 01089 /* srv_que_task_enqueue_low(thr2); */ 01090 } 01091 } 01092 01093 /******************************************************************** 01094 Builds an undo 'query' graph for a transaction. The actual rollback is 01095 performed by executing this query graph like a query subprocedure call. 01096 The reply about the completion of the rollback will be sent by this 01097 graph. */ 01098 01099 que_t* 01100 trx_roll_graph_build( 01101 /*=================*/ 01102 /* out, own: the query graph */ 01103 trx_t* trx) /* in: trx handle */ 01104 { 01105 mem_heap_t* heap; 01106 que_fork_t* fork; 01107 que_thr_t* thr; 01108 /* que_thr_t* thr2; */ 01109 01110 #ifdef UNIV_SYNC_DEBUG 01111 ut_ad(mutex_own(&kernel_mutex)); 01112 #endif /* UNIV_SYNC_DEBUG */ 01113 01114 heap = mem_heap_create(512); 01115 fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap); 01116 fork->trx = trx; 01117 01118 thr = que_thr_create(fork, heap); 01119 /* thr2 = que_thr_create(fork, heap); */ 01120 01121 thr->child = row_undo_node_create(trx, thr, heap); 01122 /* thr2->child = row_undo_node_create(trx, thr2, heap); */ 01123 01124 return(fork); 01125 } 01126 01127 /************************************************************************* 01128 Finishes error processing after the necessary partial rollback has been 01129 done. */ 01130 static 01131 void 01132 trx_finish_error_processing( 01133 /*========================*/ 01134 trx_t* trx) /* in: transaction */ 01135 { 01136 trx_sig_t* sig; 01137 trx_sig_t* next_sig; 01138 01139 #ifdef UNIV_SYNC_DEBUG 01140 ut_ad(mutex_own(&kernel_mutex)); 01141 #endif /* UNIV_SYNC_DEBUG */ 01142 01143 sig = UT_LIST_GET_FIRST(trx->signals); 01144 01145 while (sig != NULL) { 01146 next_sig = UT_LIST_GET_NEXT(signals, sig); 01147 01148 if (sig->type == TRX_SIG_ERROR_OCCURRED) { 01149 01150 trx_sig_remove(trx, sig); 01151 } 01152 01153 sig = next_sig; 01154 } 01155 01156 trx->que_state = TRX_QUE_RUNNING; 01157 } 01158 01159 /************************************************************************* 01160 Finishes a partial rollback operation. */ 01161 static 01162 void 01163 trx_finish_partial_rollback_off_kernel( 01164 /*===================================*/ 01165 trx_t* trx, /* in: transaction */ 01166 que_thr_t** next_thr)/* in/out: next query thread to run; 01167 if the value which is passed in is a pointer 01168 to a NULL pointer, then the calling function 01169 can start running a new query thread; if this 01170 parameter is NULL, it is ignored */ 01171 { 01172 trx_sig_t* sig; 01173 01174 #ifdef UNIV_SYNC_DEBUG 01175 ut_ad(mutex_own(&kernel_mutex)); 01176 #endif /* UNIV_SYNC_DEBUG */ 01177 01178 sig = UT_LIST_GET_FIRST(trx->signals); 01179 01180 /* Remove the signal from the signal queue and send reply message 01181 to it */ 01182 01183 trx_sig_reply(sig, next_thr); 01184 trx_sig_remove(trx, sig); 01185 01186 trx->que_state = TRX_QUE_RUNNING; 01187 } 01188 01189 /******************************************************************** 01190 Finishes a transaction rollback. */ 01191 01192 void 01193 trx_finish_rollback_off_kernel( 01194 /*===========================*/ 01195 que_t* graph, /* in: undo graph which can now be freed */ 01196 trx_t* trx, /* in: transaction */ 01197 que_thr_t** next_thr)/* in/out: next query thread to run; 01198 if the value which is passed in is 01199 a pointer to a NULL pointer, then the 01200 calling function can start running 01201 a new query thread; if this parameter is 01202 NULL, it is ignored */ 01203 { 01204 trx_sig_t* sig; 01205 trx_sig_t* next_sig; 01206 01207 #ifdef UNIV_SYNC_DEBUG 01208 ut_ad(mutex_own(&kernel_mutex)); 01209 #endif /* UNIV_SYNC_DEBUG */ 01210 01211 ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0); 01212 01213 /* Free the memory reserved by the undo graph */ 01214 que_graph_free(graph); 01215 01216 sig = UT_LIST_GET_FIRST(trx->signals); 01217 01218 if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { 01219 01220 trx_finish_partial_rollback_off_kernel(trx, next_thr); 01221 01222 return; 01223 01224 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { 01225 01226 trx_finish_error_processing(trx); 01227 01228 return; 01229 } 01230 01231 #ifdef UNIV_DEBUG 01232 if (lock_print_waits) { 01233 fprintf(stderr, "Trx %lu rollback finished\n", 01234 (ulong) ut_dulint_get_low(trx->id)); 01235 } 01236 #endif /* UNIV_DEBUG */ 01237 01238 trx_commit_off_kernel(trx); 01239 01240 /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and 01241 send reply messages to them */ 01242 01243 trx->que_state = TRX_QUE_RUNNING; 01244 01245 while (sig != NULL) { 01246 next_sig = UT_LIST_GET_NEXT(signals, sig); 01247 01248 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { 01249 01250 trx_sig_reply(sig, next_thr); 01251 01252 trx_sig_remove(trx, sig); 01253 } 01254 01255 sig = next_sig; 01256 } 01257 } 01258 01259 /************************************************************************* 01260 Creates a rollback command node struct. */ 01261 01262 roll_node_t* 01263 roll_node_create( 01264 /*=============*/ 01265 /* out, own: rollback node struct */ 01266 mem_heap_t* heap) /* in: mem heap where created */ 01267 { 01268 roll_node_t* node; 01269 01270 node = mem_heap_alloc(heap, sizeof(roll_node_t)); 01271 node->common.type = QUE_NODE_ROLLBACK; 01272 node->state = ROLL_NODE_SEND; 01273 01274 node->partial = FALSE; 01275 01276 return(node); 01277 } 01278 01279 /*************************************************************** 01280 Performs an execution step for a rollback command node in a query graph. */ 01281 01282 que_thr_t* 01283 trx_rollback_step( 01284 /*==============*/ 01285 /* out: query thread to run next, or NULL */ 01286 que_thr_t* thr) /* in: query thread */ 01287 { 01288 roll_node_t* node; 01289 ulint sig_no; 01290 trx_savept_t* savept; 01291 01292 node = thr->run_node; 01293 01294 ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK); 01295 01296 if (thr->prev_node == que_node_get_parent(node)) { 01297 node->state = ROLL_NODE_SEND; 01298 } 01299 01300 if (node->state == ROLL_NODE_SEND) { 01301 mutex_enter(&kernel_mutex); 01302 01303 node->state = ROLL_NODE_WAIT; 01304 01305 if (node->partial) { 01306 sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT; 01307 savept = &(node->savept); 01308 } else { 01309 sig_no = TRX_SIG_TOTAL_ROLLBACK; 01310 savept = NULL; 01311 } 01312 01313 /* Send a rollback signal to the transaction */ 01314 01315 trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr, 01316 savept, NULL); 01317 01318 thr->state = QUE_THR_SIG_REPLY_WAIT; 01319 01320 mutex_exit(&kernel_mutex); 01321 01322 return(NULL); 01323 } 01324 01325 ut_ad(node->state == ROLL_NODE_WAIT); 01326 01327 thr->run_node = que_node_get_parent(node); 01328 01329 return(thr); 01330 }
1.4.7

