00001 /****************************************************** 00002 The tablespace memory cache 00003 00004 (c) 1995 Innobase Oy 00005 00006 Created 10/25/1995 Heikki Tuuri 00007 *******************************************************/ 00008 00009 #include "fil0fil.h" 00010 00011 #include "mem0mem.h" 00012 #include "sync0sync.h" 00013 #include "hash0hash.h" 00014 #include "os0file.h" 00015 #include "os0sync.h" 00016 #include "mach0data.h" 00017 #include "ibuf0ibuf.h" 00018 #include "buf0buf.h" 00019 #include "buf0flu.h" 00020 #include "buf0lru.h" 00021 #include "log0log.h" 00022 #include "log0recv.h" 00023 #include "fsp0fsp.h" 00024 #include "srv0srv.h" 00025 #include "srv0start.h" 00026 #include "mtr0mtr.h" 00027 #include "mtr0log.h" 00028 #include "dict0dict.h" 00029 00030 00031 /* 00032 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE 00033 ============================================= 00034 00035 The tablespace cache is responsible for providing fast read/write access to 00036 tablespaces and logs of the database. File creation and deletion is done 00037 in other modules which know more of the logic of the operation, however. 00038 00039 A tablespace consists of a chain of files. The size of the files does not 00040 have to be divisible by the database block size, because we may just leave 00041 the last incomplete block unused. When a new file is appended to the 00042 tablespace, the maximum size of the file is also specified. At the moment, 00043 we think that it is best to extend the file to its maximum size already at 00044 the creation of the file, because then we can avoid dynamically extending 00045 the file when more space is needed for the tablespace. 00046 00047 A block's position in the tablespace is specified with a 32-bit unsigned 00048 integer. The files in the chain are thought to be catenated, and the block 00049 corresponding to an address n is the nth block in the catenated file (where 00050 the first block is named the 0th block, and the incomplete block fragments 00051 at the end of files are not taken into account). A tablespace can be extended 00052 by appending a new file at the end of the chain. 00053 00054 Our tablespace concept is similar to the one of Oracle. 00055 00056 To acquire more speed in disk transfers, a technique called disk striping is 00057 sometimes used. This means that logical block addresses are divided in a 00058 round-robin fashion across several disks. Windows NT supports disk striping, 00059 so there we do not need to support it in the database. Disk striping is 00060 implemented in hardware in RAID disks. We conclude that it is not necessary 00061 to implement it in the database. Oracle 7 does not support disk striping, 00062 either. 00063 00064 Another trick used at some database sites is replacing tablespace files by 00065 raw disks, that is, the whole physical disk drive, or a partition of it, is 00066 opened as a single file, and it is accessed through byte offsets calculated 00067 from the start of the disk or the partition. This is recommended in some 00068 books on database tuning to achieve more speed in i/o. Using raw disk 00069 certainly prevents the OS from fragmenting disk space, but it is not clear 00070 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file 00071 system + EIDE Conner disk only a negligible difference in speed when reading 00072 from a file, versus reading from a raw disk. 00073 00074 To have fast access to a tablespace or a log file, we put the data structures 00075 to a hash table. Each tablespace and log file is given an unique 32-bit 00076 identifier. 00077 00078 Some operating systems do not support many open files at the same time, 00079 though NT seems to tolerate at least 900 open files. Therefore, we put the 00080 open files in an LRU-list. If we need to open another file, we may close the 00081 file at the end of the LRU-list. When an i/o-operation is pending on a file, 00082 the file cannot be closed. We take the file nodes with pending i/o-operations 00083 out of the LRU-list and keep a count of pending operations. When an operation 00084 completes, we decrement the count and return the file node to the LRU-list if 00085 the count drops to zero. */ 00086 00087 /* When mysqld is run, the default directory "." is the mysqld datadir, 00088 but in the MySQL Embedded Server Library and ibbackup it is not the default 00089 directory, and we must set the base file path explicitly */ 00090 const char* fil_path_to_mysql_datadir = "."; 00091 00092 /* The number of fsyncs done to the log */ 00093 ulint fil_n_log_flushes = 0; 00094 00095 ulint fil_n_pending_log_flushes = 0; 00096 ulint fil_n_pending_tablespace_flushes = 0; 00097 00098 /* Null file address */ 00099 fil_addr_t fil_addr_null = {FIL_NULL, 0}; 00100 00101 /* File node of a tablespace or the log data space */ 00102 struct fil_node_struct { 00103 fil_space_t* space; /* backpointer to the space where this node 00104 belongs */ 00105 char* name; /* path to the file */ 00106 ibool open; /* TRUE if file open */ 00107 os_file_t handle; /* OS handle to the file, if file open */ 00108 ibool is_raw_disk;/* TRUE if the 'file' is actually a raw 00109 device or a raw disk partition */ 00110 ulint size; /* size of the file in database pages, 0 if 00111 not known yet; the possible last incomplete 00112 megabyte may be ignored if space == 0 */ 00113 ulint n_pending; 00114 /* count of pending i/o's on this file; 00115 closing of the file is not allowed if 00116 this is > 0 */ 00117 ulint n_pending_flushes; 00118 /* count of pending flushes on this file; 00119 closing of the file is not allowed if 00120 this is > 0 */ 00121 ib_longlong modification_counter;/* when we write to the file we 00122 increment this by one */ 00123 ib_longlong flush_counter;/* up to what modification_counter value 00124 we have flushed the modifications to disk */ 00125 UT_LIST_NODE_T(fil_node_t) chain; 00126 /* link field for the file chain */ 00127 UT_LIST_NODE_T(fil_node_t) LRU; 00128 /* link field for the LRU list */ 00129 ulint magic_n; 00130 }; 00131 00132 #define FIL_NODE_MAGIC_N 89389 00133 00134 /* Tablespace or log data space: let us call them by a common name space */ 00135 struct fil_space_struct { 00136 char* name; /* space name = the path to the first file in 00137 it */ 00138 ulint id; /* space id */ 00139 ib_longlong tablespace_version; 00140 /* in DISCARD/IMPORT this timestamp is used to 00141 check if we should ignore an insert buffer 00142 merge request for a page because it actually 00143 was for the previous incarnation of the 00144 space */ 00145 ibool mark; /* this is set to TRUE at database startup if 00146 the space corresponds to a table in the InnoDB 00147 data dictionary; so we can print a warning of 00148 orphaned tablespaces */ 00149 ibool stop_ios;/* TRUE if we want to rename the .ibd file of 00150 tablespace and want to stop temporarily 00151 posting of new i/o requests on the file */ 00152 ibool stop_ibuf_merges; 00153 /* we set this TRUE when we start deleting a 00154 single-table tablespace */ 00155 ibool is_being_deleted; 00156 /* this is set to TRUE when we start 00157 deleting a single-table tablespace and its 00158 file; when this flag is set no further i/o 00159 or flush requests can be placed on this space, 00160 though there may be such requests still being 00161 processed on this space */ 00162 ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */ 00163 UT_LIST_BASE_NODE_T(fil_node_t) chain; 00164 /* base node for the file chain */ 00165 ulint size; /* space size in pages; 0 if a single-table 00166 tablespace whose size we do not know yet; 00167 last incomplete megabytes in data files may be 00168 ignored if space == 0 */ 00169 ulint n_reserved_extents; 00170 /* number of reserved free extents for 00171 ongoing operations like B-tree page split */ 00172 ulint n_pending_flushes; /* this is > 0 when flushing 00173 the tablespace to disk; dropping of the 00174 tablespace is forbidden if this is > 0 */ 00175 ulint n_pending_ibuf_merges;/* this is > 0 when merging 00176 insert buffer entries to a page so that we 00177 may need to access the ibuf bitmap page in the 00178 tablespade: dropping of the tablespace is 00179 forbidden if this is > 0 */ 00180 hash_node_t hash; /* hash chain node */ 00181 hash_node_t name_hash;/* hash chain the name_hash table */ 00182 rw_lock_t latch; /* latch protecting the file space storage 00183 allocation */ 00184 UT_LIST_NODE_T(fil_space_t) unflushed_spaces; 00185 /* list of spaces with at least one unflushed 00186 file we have written to */ 00187 ibool is_in_unflushed_spaces; /* TRUE if this space is 00188 currently in the list above */ 00189 UT_LIST_NODE_T(fil_space_t) space_list; 00190 /* list of all spaces */ 00191 ibuf_data_t* ibuf_data; 00192 /* insert buffer data */ 00193 ulint magic_n; 00194 }; 00195 00196 #define FIL_SPACE_MAGIC_N 89472 00197 00198 /* The tablespace memory cache; also the totality of logs = the log data space, 00199 is stored here; below we talk about tablespaces, but also the ib_logfiles 00200 form a 'space' and it is handled here */ 00201 00202 typedef struct fil_system_struct fil_system_t; 00203 struct fil_system_struct { 00204 mutex_t mutex; /* The mutex protecting the cache */ 00205 hash_table_t* spaces; /* The hash table of spaces in the 00206 system; they are hashed on the space 00207 id */ 00208 hash_table_t* name_hash; /* hash table based on the space 00209 name */ 00210 UT_LIST_BASE_NODE_T(fil_node_t) LRU; 00211 /* base node for the LRU list of the 00212 most recently used open files with no 00213 pending i/o's; if we start an i/o on 00214 the file, we first remove it from this 00215 list, and return it to the start of 00216 the list when the i/o ends; 00217 log files and the system tablespace are 00218 not put to this list: they are opened 00219 after the startup, and kept open until 00220 shutdown */ 00221 UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces; 00222 /* base node for the list of those 00223 tablespaces whose files contain 00224 unflushed writes; those spaces have 00225 at least one file node where 00226 modification_counter > flush_counter */ 00227 ulint n_open; /* number of files currently open */ 00228 ulint max_n_open; /* n_open is not allowed to exceed 00229 this */ 00230 ib_longlong modification_counter;/* when we write to a file we 00231 increment this by one */ 00232 ulint max_assigned_id;/* maximum space id in the existing 00233 tables, or assigned during the time 00234 mysqld has been up; at an InnoDB 00235 startup we scan the data dictionary 00236 and set here the maximum of the 00237 space id's of the tables there */ 00238 ib_longlong tablespace_version; 00239 /* a counter which is incremented for 00240 every space object memory creation; 00241 every space mem object gets a 00242 'timestamp' from this; in DISCARD/ 00243 IMPORT this is used to check if we 00244 should ignore an insert buffer merge 00245 request */ 00246 UT_LIST_BASE_NODE_T(fil_space_t) space_list; 00247 /* list of all file spaces */ 00248 }; 00249 00250 /* The tablespace memory cache. This variable is NULL before the module is 00251 initialized. */ 00252 fil_system_t* fil_system = NULL; 00253 00254 /* The tablespace memory cache hash table size */ 00255 #define FIL_SYSTEM_HASH_SIZE 50 /* TODO: make bigger! */ 00256 00257 00258 /************************************************************************ 00259 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! 00260 00261 Prepares a file node for i/o. Opens the file if it is closed. Updates the 00262 pending i/o's field in the node and the system appropriately. Takes the node 00263 off the LRU list if it is in the LRU list. The caller must hold the fil_sys 00264 mutex. */ 00265 static 00266 void 00267 fil_node_prepare_for_io( 00268 /*====================*/ 00269 fil_node_t* node, /* in: file node */ 00270 fil_system_t* system, /* in: tablespace memory cache */ 00271 fil_space_t* space); /* in: space */ 00272 /************************************************************************ 00273 Updates the data structures when an i/o operation finishes. Updates the 00274 pending i/o's field in the node appropriately. */ 00275 static 00276 void 00277 fil_node_complete_io( 00278 /*=================*/ 00279 fil_node_t* node, /* in: file node */ 00280 fil_system_t* system, /* in: tablespace memory cache */ 00281 ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks 00282 the node as modified if 00283 type == OS_FILE_WRITE */ 00284 /*********************************************************************** 00285 Checks if a single-table tablespace for a given table name exists in the 00286 tablespace memory cache. */ 00287 static 00288 ulint 00289 fil_get_space_id_for_table( 00290 /*=======================*/ 00291 /* out: space id, ULINT_UNDEFINED if not 00292 found */ 00293 const char* name); /* in: table name in the standard 00294 'databasename/tablename' format */ 00295 00296 00297 /*********************************************************************** 00298 Returns the version number of a tablespace, -1 if not found. */ 00299 00300 ib_longlong 00301 fil_space_get_version( 00302 /*==================*/ 00303 /* out: version number, -1 if the tablespace does not 00304 exist in the memory cache */ 00305 ulint id) /* in: space id */ 00306 { 00307 fil_system_t* system = fil_system; 00308 fil_space_t* space; 00309 ib_longlong version = -1; 00310 00311 ut_ad(system); 00312 00313 mutex_enter(&(system->mutex)); 00314 00315 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00316 00317 if (space) { 00318 version = space->tablespace_version; 00319 } 00320 00321 mutex_exit(&(system->mutex)); 00322 00323 return(version); 00324 } 00325 00326 /*********************************************************************** 00327 Returns the latch of a file space. */ 00328 00329 rw_lock_t* 00330 fil_space_get_latch( 00331 /*================*/ 00332 /* out: latch protecting storage allocation */ 00333 ulint id) /* in: space id */ 00334 { 00335 fil_system_t* system = fil_system; 00336 fil_space_t* space; 00337 00338 ut_ad(system); 00339 00340 mutex_enter(&(system->mutex)); 00341 00342 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00343 00344 ut_a(space); 00345 00346 mutex_exit(&(system->mutex)); 00347 00348 return(&(space->latch)); 00349 } 00350 00351 /*********************************************************************** 00352 Returns the type of a file space. */ 00353 00354 ulint 00355 fil_space_get_type( 00356 /*===============*/ 00357 /* out: FIL_TABLESPACE or FIL_LOG */ 00358 ulint id) /* in: space id */ 00359 { 00360 fil_system_t* system = fil_system; 00361 fil_space_t* space; 00362 00363 ut_ad(system); 00364 00365 mutex_enter(&(system->mutex)); 00366 00367 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00368 00369 ut_a(space); 00370 00371 mutex_exit(&(system->mutex)); 00372 00373 return(space->purpose); 00374 } 00375 00376 /*********************************************************************** 00377 Returns the ibuf data of a file space. */ 00378 00379 ibuf_data_t* 00380 fil_space_get_ibuf_data( 00381 /*====================*/ 00382 /* out: ibuf data for this space */ 00383 ulint id) /* in: space id */ 00384 { 00385 fil_system_t* system = fil_system; 00386 fil_space_t* space; 00387 00388 ut_ad(system); 00389 00390 ut_a(id == 0); 00391 00392 mutex_enter(&(system->mutex)); 00393 00394 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00395 00396 mutex_exit(&(system->mutex)); 00397 00398 ut_a(space); 00399 00400 return(space->ibuf_data); 00401 } 00402 00403 /************************************************************************** 00404 Checks if all the file nodes in a space are flushed. The caller must hold 00405 the fil_system mutex. */ 00406 static 00407 ibool 00408 fil_space_is_flushed( 00409 /*=================*/ 00410 /* out: TRUE if all are flushed */ 00411 fil_space_t* space) /* in: space */ 00412 { 00413 fil_node_t* node; 00414 00415 #ifdef UNIV_SYNC_DEBUG 00416 ut_ad(mutex_own(&(fil_system->mutex))); 00417 #endif /* UNIV_SYNC_DEBUG */ 00418 00419 node = UT_LIST_GET_FIRST(space->chain); 00420 00421 while (node) { 00422 if (node->modification_counter > node->flush_counter) { 00423 00424 return(FALSE); 00425 } 00426 00427 node = UT_LIST_GET_NEXT(chain, node); 00428 } 00429 00430 return(TRUE); 00431 } 00432 00433 /*********************************************************************** 00434 Appends a new file to the chain of files of a space. File must be closed. */ 00435 00436 void 00437 fil_node_create( 00438 /*============*/ 00439 const char* name, /* in: file name (file must be closed) */ 00440 ulint size, /* in: file size in database blocks, rounded 00441 downwards to an integer */ 00442 ulint id, /* in: space id where to append */ 00443 ibool is_raw) /* in: TRUE if a raw device or 00444 a raw disk partition */ 00445 { 00446 fil_system_t* system = fil_system; 00447 fil_node_t* node; 00448 fil_space_t* space; 00449 00450 ut_a(system); 00451 ut_a(name); 00452 00453 mutex_enter(&(system->mutex)); 00454 00455 node = mem_alloc(sizeof(fil_node_t)); 00456 00457 node->name = mem_strdup(name); 00458 node->open = FALSE; 00459 00460 ut_a(!is_raw || srv_start_raw_disk_in_use); 00461 00462 node->is_raw_disk = is_raw; 00463 node->size = size; 00464 node->magic_n = FIL_NODE_MAGIC_N; 00465 node->n_pending = 0; 00466 node->n_pending_flushes = 0; 00467 00468 node->modification_counter = 0; 00469 node->flush_counter = 0; 00470 00471 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00472 00473 if (!space) { 00474 ut_print_timestamp(stderr); 00475 fprintf(stderr, 00476 " InnoDB: Error: Could not find tablespace %lu for\n" 00477 "InnoDB: file ", (ulong) id); 00478 ut_print_filename(stderr, name); 00479 fputs(" in the tablespace memory cache.\n", stderr); 00480 mem_free(node->name); 00481 00482 mem_free(node); 00483 00484 mutex_exit(&(system->mutex)); 00485 00486 return; 00487 } 00488 00489 space->size += size; 00490 00491 node->space = space; 00492 00493 UT_LIST_ADD_LAST(chain, space->chain, node); 00494 00495 mutex_exit(&(system->mutex)); 00496 } 00497 00498 /************************************************************************ 00499 Opens a the file of a node of a tablespace. The caller must own the fil_system 00500 mutex. */ 00501 static 00502 void 00503 fil_node_open_file( 00504 /*===============*/ 00505 fil_node_t* node, /* in: file node */ 00506 fil_system_t* system, /* in: tablespace memory cache */ 00507 fil_space_t* space) /* in: space */ 00508 { 00509 ib_longlong size_bytes; 00510 ulint size_low; 00511 ulint size_high; 00512 ibool ret; 00513 ibool success; 00514 #ifndef UNIV_HOTBACKUP 00515 byte* buf2; 00516 byte* page; 00517 ulint space_id; 00518 #endif /* !UNIV_HOTBACKUP */ 00519 00520 #ifdef UNIV_SYNC_DEBUG 00521 ut_ad(mutex_own(&(system->mutex))); 00522 #endif /* UNIV_SYNC_DEBUG */ 00523 ut_a(node->n_pending == 0); 00524 ut_a(node->open == FALSE); 00525 00526 if (node->size == 0) { 00527 /* It must be a single-table tablespace and we do not know the 00528 size of the file yet. First we open the file in the normal 00529 mode, no async I/O here, for simplicity. Then do some checks, 00530 and close the file again. 00531 NOTE that we could not use the simple file read function 00532 os_file_read() in Windows to read from a file opened for 00533 async I/O! */ 00534 00535 node->handle = os_file_create_simple_no_error_handling( 00536 node->name, OS_FILE_OPEN, 00537 OS_FILE_READ_ONLY, &success); 00538 if (!success) { 00539 /* The following call prints an error message */ 00540 os_file_get_last_error(TRUE); 00541 00542 ut_print_timestamp(stderr); 00543 00544 fprintf(stderr, 00545 " InnoDB: Fatal error: cannot open %s\n." 00546 "InnoDB: Have you deleted .ibd files under a running mysqld server?\n", 00547 node->name); 00548 ut_a(0); 00549 } 00550 00551 os_file_get_size(node->handle, &size_low, &size_high); 00552 00553 size_bytes = (((ib_longlong)size_high) << 32) 00554 + (ib_longlong)size_low; 00555 #ifdef UNIV_HOTBACKUP 00556 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); 00557 00558 #else 00559 ut_a(space->purpose != FIL_LOG); 00560 ut_a(space->id != 0); 00561 00562 if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { 00563 fprintf(stderr, 00564 "InnoDB: Error: the size of single-table tablespace file %s\n" 00565 "InnoDB: is only %lu %lu, should be at least %lu!\n", node->name, 00566 (ulong) size_high, 00567 (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE)); 00568 00569 ut_a(0); 00570 } 00571 00572 /* Read the first page of the tablespace */ 00573 00574 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 00575 /* Align the memory for file i/o if we might have O_DIRECT 00576 set */ 00577 page = ut_align(buf2, UNIV_PAGE_SIZE); 00578 00579 success = os_file_read(node->handle, page, 0, 0, 00580 UNIV_PAGE_SIZE); 00581 space_id = fsp_header_get_space_id(page); 00582 00583 ut_free(buf2); 00584 00585 /* Close the file now that we have read the space id from it */ 00586 00587 os_file_close(node->handle); 00588 00589 if (space_id == ULINT_UNDEFINED || space_id == 0) { 00590 fprintf(stderr, 00591 "InnoDB: Error: tablespace id %lu in file %s is not sensible\n", 00592 (ulong) space_id, 00593 node->name); 00594 00595 ut_a(0); 00596 } 00597 00598 if (space_id != space->id) { 00599 fprintf(stderr, 00600 "InnoDB: Error: tablespace id is %lu in the data dictionary\n" 00601 "InnoDB: but in file %s it is %lu!\n", space->id, node->name, space_id); 00602 00603 ut_a(0); 00604 } 00605 00606 if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) { 00607 node->size = (ulint) ((size_bytes / (1024 * 1024)) 00608 * ((1024 * 1024) / UNIV_PAGE_SIZE)); 00609 } else { 00610 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); 00611 } 00612 #endif 00613 space->size += node->size; 00614 } 00615 00616 /* printf("Opening file %s\n", node->name); */ 00617 00618 /* Open the file for reading and writing, in Windows normally in the 00619 unbuffered async I/O mode, though global variables may make 00620 os_file_create() to fall back to the normal file I/O mode. */ 00621 00622 if (space->purpose == FIL_LOG) { 00623 node->handle = os_file_create(node->name, OS_FILE_OPEN, 00624 OS_FILE_AIO, OS_LOG_FILE, &ret); 00625 } else if (node->is_raw_disk) { 00626 node->handle = os_file_create(node->name, 00627 OS_FILE_OPEN_RAW, 00628 OS_FILE_AIO, OS_DATA_FILE, &ret); 00629 } else { 00630 node->handle = os_file_create(node->name, OS_FILE_OPEN, 00631 OS_FILE_AIO, OS_DATA_FILE, &ret); 00632 } 00633 00634 ut_a(ret); 00635 00636 node->open = TRUE; 00637 00638 system->n_open++; 00639 00640 if (space->purpose == FIL_TABLESPACE && space->id != 0) { 00641 /* Put the node to the LRU list */ 00642 UT_LIST_ADD_FIRST(LRU, system->LRU, node); 00643 } 00644 } 00645 00646 /************************************************************************** 00647 Closes a file. */ 00648 static 00649 void 00650 fil_node_close_file( 00651 /*================*/ 00652 fil_node_t* node, /* in: file node */ 00653 fil_system_t* system) /* in: tablespace memory cache */ 00654 { 00655 ibool ret; 00656 00657 ut_ad(node && system); 00658 #ifdef UNIV_SYNC_DEBUG 00659 ut_ad(mutex_own(&(system->mutex))); 00660 #endif /* UNIV_SYNC_DEBUG */ 00661 ut_a(node->open); 00662 ut_a(node->n_pending == 0); 00663 ut_a(node->n_pending_flushes == 0); 00664 ut_a(node->modification_counter == node->flush_counter); 00665 00666 ret = os_file_close(node->handle); 00667 ut_a(ret); 00668 00669 /* printf("Closing file %s\n", node->name); */ 00670 00671 node->open = FALSE; 00672 ut_a(system->n_open > 0); 00673 system->n_open--; 00674 00675 if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) { 00676 ut_a(UT_LIST_GET_LEN(system->LRU) > 0); 00677 00678 /* The node is in the LRU list, remove it */ 00679 UT_LIST_REMOVE(LRU, system->LRU, node); 00680 } 00681 } 00682 00683 /************************************************************************ 00684 Tries to close a file in the LRU list. The caller must hold the fil_sys 00685 mutex. */ 00686 static 00687 ibool 00688 fil_try_to_close_file_in_LRU( 00689 /*=========================*/ 00690 /* out: TRUE if success, FALSE if should retry 00691 later; since i/o's generally complete in < 00692 100 ms, and as InnoDB writes at most 128 pages 00693 from the buffer pool in a batch, and then 00694 immediately flushes the files, there is a good 00695 chance that the next time we find a suitable 00696 node from the LRU list */ 00697 ibool print_info) /* in: if TRUE, prints information why it 00698 cannot close a file */ 00699 { 00700 fil_system_t* system = fil_system; 00701 fil_node_t* node; 00702 00703 #ifdef UNIV_SYNC_DEBUG 00704 ut_ad(mutex_own(&(system->mutex))); 00705 #endif /* UNIV_SYNC_DEBUG */ 00706 node = UT_LIST_GET_LAST(system->LRU); 00707 00708 if (print_info) { 00709 fprintf(stderr, 00710 "InnoDB: fil_sys open file LRU len %lu\n", (ulong) UT_LIST_GET_LEN(system->LRU)); 00711 } 00712 00713 while (node != NULL) { 00714 if (node->modification_counter == node->flush_counter 00715 && node->n_pending_flushes == 0) { 00716 00717 fil_node_close_file(node, system); 00718 00719 return(TRUE); 00720 } 00721 00722 if (print_info && node->n_pending_flushes > 0) { 00723 fputs("InnoDB: cannot close file ", stderr); 00724 ut_print_filename(stderr, node->name); 00725 fprintf(stderr, ", because n_pending_flushes %lu\n", 00726 (ulong) node->n_pending_flushes); 00727 } 00728 00729 if (print_info 00730 && node->modification_counter != node->flush_counter) { 00731 fputs("InnoDB: cannot close file ", stderr); 00732 ut_print_filename(stderr, node->name); 00733 fprintf(stderr, 00734 ", because mod_count %ld != fl_count %ld\n", 00735 (long) node->modification_counter, 00736 (long) node->flush_counter); 00737 } 00738 00739 node = UT_LIST_GET_PREV(LRU, node); 00740 } 00741 00742 return(FALSE); 00743 } 00744 00745 /*********************************************************************** 00746 Reserves the fil_system mutex and tries to make sure we can open at least one 00747 file while holding it. This should be called before calling 00748 fil_node_prepare_for_io(), because that function may need to open a file. */ 00749 static 00750 void 00751 fil_mutex_enter_and_prepare_for_io( 00752 /*===============================*/ 00753 ulint space_id) /* in: space id */ 00754 { 00755 fil_system_t* system = fil_system; 00756 fil_space_t* space; 00757 ibool success; 00758 ibool print_info = FALSE; 00759 ulint count = 0; 00760 ulint count2 = 0; 00761 00762 #ifdef UNIV_SYNC_DEBUG 00763 ut_ad(!mutex_own(&(system->mutex))); 00764 #endif /* UNIV_SYNC_DEBUG */ 00765 retry: 00766 mutex_enter(&(system->mutex)); 00767 00768 if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { 00769 /* We keep log files and system tablespace files always open; 00770 this is important in preventing deadlocks in this module, as 00771 a page read completion often performs another read from the 00772 insert buffer. The insert buffer is in tablespace 0, and we 00773 cannot end up waiting in this function. */ 00774 00775 return; 00776 } 00777 00778 if (system->n_open < system->max_n_open) { 00779 00780 return; 00781 } 00782 00783 HASH_SEARCH(hash, system->spaces, space_id, space, 00784 space->id == space_id); 00785 if (space != NULL && space->stop_ios) { 00786 /* We are going to do a rename file and want to stop new i/o's 00787 for a while */ 00788 00789 if (count2 > 20000) { 00790 fputs("InnoDB: Warning: tablespace ", stderr); 00791 ut_print_filename(stderr, space->name); 00792 fprintf(stderr, 00793 " has i/o ops stopped for a long time %lu\n", 00794 (ulong) count2); 00795 } 00796 00797 mutex_exit(&(system->mutex)); 00798 00799 os_thread_sleep(20000); 00800 00801 count2++; 00802 00803 goto retry; 00804 } 00805 00806 /* If the file is already open, no need to do anything; if the space 00807 does not exist, we handle the situation in the function which called 00808 this function */ 00809 00810 if (!space || UT_LIST_GET_FIRST(space->chain)->open) { 00811 00812 return; 00813 } 00814 00815 if (count > 1) { 00816 print_info = TRUE; 00817 } 00818 00819 /* Too many files are open, try to close some */ 00820 close_more: 00821 success = fil_try_to_close_file_in_LRU(print_info); 00822 00823 if (success && system->n_open >= system->max_n_open) { 00824 00825 goto close_more; 00826 } 00827 00828 if (system->n_open < system->max_n_open) { 00829 /* Ok */ 00830 00831 return; 00832 } 00833 00834 if (count >= 2) { 00835 ut_print_timestamp(stderr); 00836 fprintf(stderr, 00837 " InnoDB: Warning: too many (%lu) files stay open while the maximum\n" 00838 "InnoDB: allowed value would be %lu.\n" 00839 "InnoDB: You may need to raise the value of innodb_max_files_open in\n" 00840 "InnoDB: my.cnf.\n", (ulong) system->n_open, (ulong) system->max_n_open); 00841 00842 return; 00843 } 00844 00845 mutex_exit(&(system->mutex)); 00846 00847 #ifndef UNIV_HOTBACKUP 00848 /* Wake the i/o-handler threads to make sure pending i/o's are 00849 performed */ 00850 os_aio_simulated_wake_handler_threads(); 00851 00852 os_thread_sleep(20000); 00853 #endif 00854 /* Flush tablespaces so that we can close modified files in the LRU 00855 list */ 00856 00857 fil_flush_file_spaces(FIL_TABLESPACE); 00858 00859 count++; 00860 00861 goto retry; 00862 } 00863 00864 /*********************************************************************** 00865 Frees a file node object from a tablespace memory cache. */ 00866 static 00867 void 00868 fil_node_free( 00869 /*==========*/ 00870 fil_node_t* node, /* in, own: file node */ 00871 fil_system_t* system, /* in: tablespace memory cache */ 00872 fil_space_t* space) /* in: space where the file node is chained */ 00873 { 00874 ut_ad(node && system && space); 00875 #ifdef UNIV_SYNC_DEBUG 00876 ut_ad(mutex_own(&(system->mutex))); 00877 #endif /* UNIV_SYNC_DEBUG */ 00878 ut_a(node->magic_n == FIL_NODE_MAGIC_N); 00879 ut_a(node->n_pending == 0); 00880 00881 if (node->open) { 00882 /* We fool the assertion in fil_node_close_file() to think 00883 there are no unflushed modifications in the file */ 00884 00885 node->modification_counter = node->flush_counter; 00886 00887 if (space->is_in_unflushed_spaces 00888 && fil_space_is_flushed(space)) { 00889 00890 space->is_in_unflushed_spaces = FALSE; 00891 00892 UT_LIST_REMOVE(unflushed_spaces, 00893 system->unflushed_spaces, 00894 space); 00895 } 00896 00897 fil_node_close_file(node, system); 00898 } 00899 00900 space->size -= node->size; 00901 00902 UT_LIST_REMOVE(chain, space->chain, node); 00903 00904 mem_free(node->name); 00905 mem_free(node); 00906 } 00907 00908 /******************************************************************** 00909 Drops files from the start of a file space, so that its size is cut by 00910 the amount given. */ 00911 00912 void 00913 fil_space_truncate_start( 00914 /*=====================*/ 00915 ulint id, /* in: space id */ 00916 ulint trunc_len) /* in: truncate by this much; it is an error 00917 if this does not equal to the combined size of 00918 some initial files in the space */ 00919 { 00920 fil_system_t* system = fil_system; 00921 fil_node_t* node; 00922 fil_space_t* space; 00923 00924 mutex_enter(&(system->mutex)); 00925 00926 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 00927 00928 ut_a(space); 00929 00930 while (trunc_len > 0) { 00931 node = UT_LIST_GET_FIRST(space->chain); 00932 00933 ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len); 00934 00935 trunc_len -= node->size * UNIV_PAGE_SIZE; 00936 00937 fil_node_free(node, system, space); 00938 } 00939 00940 mutex_exit(&(system->mutex)); 00941 } 00942 00943 /*********************************************************************** 00944 Creates a space memory object and puts it to the tablespace memory cache. If 00945 there is an error, prints an error message to the .err log. */ 00946 00947 ibool 00948 fil_space_create( 00949 /*=============*/ 00950 /* out: TRUE if success */ 00951 const char* name, /* in: space name */ 00952 ulint id, /* in: space id */ 00953 ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */ 00954 { 00955 fil_system_t* system = fil_system; 00956 fil_space_t* space; 00957 ulint namesake_id; 00958 try_again: 00959 /*printf( 00960 "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name, 00961 purpose);*/ 00962 00963 ut_a(system); 00964 ut_a(name); 00965 00966 mutex_enter(&(system->mutex)); 00967 00968 HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space, 00969 0 == strcmp(name, space->name)); 00970 if (space != NULL) { 00971 ut_print_timestamp(stderr); 00972 fprintf(stderr, 00973 " InnoDB: Warning: trying to init to the tablespace memory cache\n" 00974 "InnoDB: a tablespace %lu of name ", (ulong) id); 00975 ut_print_filename(stderr, name); 00976 fprintf(stderr, ",\n" 00977 "InnoDB: but a tablespace %lu of the same name\n" 00978 "InnoDB: already exists in the tablespace memory cache!\n", 00979 (ulong) space->id); 00980 00981 if (id == 0 || purpose != FIL_TABLESPACE) { 00982 00983 mutex_exit(&(system->mutex)); 00984 00985 return(FALSE); 00986 } 00987 00988 fprintf(stderr, 00989 "InnoDB: We assume that InnoDB did a crash recovery, and you had\n" 00990 "InnoDB: an .ibd file for which the table did not exist in the\n" 00991 "InnoDB: InnoDB internal data dictionary in the ibdata files.\n" 00992 "InnoDB: We assume that you later removed the .ibd and .frm files,\n" 00993 "InnoDB: and are now trying to recreate the table. We now remove the\n" 00994 "InnoDB: conflicting tablespace object from the memory cache and try\n" 00995 "InnoDB: the init again.\n"); 00996 00997 namesake_id = space->id; 00998 00999 mutex_exit(&(system->mutex)); 01000 01001 fil_space_free(namesake_id); 01002 01003 goto try_again; 01004 } 01005 01006 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01007 01008 if (space != NULL) { 01009 fprintf(stderr, 01010 "InnoDB: Error: trying to add tablespace %lu of name ", (ulong) id); 01011 ut_print_filename(stderr, name); 01012 fprintf(stderr, "\n" 01013 "InnoDB: to the tablespace memory cache, but tablespace\n" 01014 "InnoDB: %lu of name ", (ulong) space->id); 01015 ut_print_filename(stderr, space->name); 01016 fputs(" already exists in the tablespace\n" 01017 "InnoDB: memory cache!\n", stderr); 01018 01019 mutex_exit(&(system->mutex)); 01020 01021 return(FALSE); 01022 } 01023 01024 space = mem_alloc(sizeof(fil_space_t)); 01025 01026 space->name = mem_strdup(name); 01027 space->id = id; 01028 01029 system->tablespace_version++; 01030 space->tablespace_version = system->tablespace_version; 01031 space->mark = FALSE; 01032 01033 if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) { 01034 system->max_assigned_id = id; 01035 } 01036 01037 space->stop_ios = FALSE; 01038 space->stop_ibuf_merges = FALSE; 01039 space->is_being_deleted = FALSE; 01040 space->purpose = purpose; 01041 space->size = 0; 01042 01043 space->n_reserved_extents = 0; 01044 01045 space->n_pending_flushes = 0; 01046 space->n_pending_ibuf_merges = 0; 01047 01048 UT_LIST_INIT(space->chain); 01049 space->magic_n = FIL_SPACE_MAGIC_N; 01050 01051 space->ibuf_data = NULL; 01052 01053 rw_lock_create(&space->latch, SYNC_FSP); 01054 01055 HASH_INSERT(fil_space_t, hash, system->spaces, id, space); 01056 01057 HASH_INSERT(fil_space_t, name_hash, system->name_hash, 01058 ut_fold_string(name), space); 01059 space->is_in_unflushed_spaces = FALSE; 01060 01061 UT_LIST_ADD_LAST(space_list, system->space_list, space); 01062 01063 mutex_exit(&(system->mutex)); 01064 01065 return(TRUE); 01066 } 01067 01068 /*********************************************************************** 01069 Assigns a new space id for a new single-table tablespace. This works simply by 01070 incrementing the global counter. If 4 billion id's is not enough, we may need 01071 to recycle id's. */ 01072 static 01073 ulint 01074 fil_assign_new_space_id(void) 01075 /*=========================*/ 01076 /* out: new tablespace id; ULINT_UNDEFINED if could 01077 not assign an id */ 01078 { 01079 fil_system_t* system = fil_system; 01080 ulint id; 01081 01082 mutex_enter(&(system->mutex)); 01083 01084 system->max_assigned_id++; 01085 01086 id = system->max_assigned_id; 01087 01088 if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { 01089 ut_print_timestamp(stderr); 01090 fprintf(stderr, 01091 "InnoDB: Warning: you are running out of new single-table tablespace id's.\n" 01092 "InnoDB: Current counter is %lu and it must not exceed %lu!\n" 01093 "InnoDB: To reset the counter to zero you have to dump all your tables and\n" 01094 "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id, 01095 (ulong) SRV_LOG_SPACE_FIRST_ID); 01096 } 01097 01098 if (id >= SRV_LOG_SPACE_FIRST_ID) { 01099 ut_print_timestamp(stderr); 01100 fprintf(stderr, 01101 "InnoDB: You have run out of single-table tablespace id's!\n" 01102 "InnoDB: Current counter is %lu.\n" 01103 "InnoDB: To reset the counter to zero you have to dump all your tables and\n" 01104 "InnoDB: recreate the whole InnoDB installation.\n", (ulong) id); 01105 system->max_assigned_id--; 01106 01107 id = ULINT_UNDEFINED; 01108 } 01109 01110 mutex_exit(&(system->mutex)); 01111 01112 return(id); 01113 } 01114 01115 /*********************************************************************** 01116 Frees a space object from the tablespace memory cache. Closes the files in 01117 the chain but does not delete them. There must not be any pending i/o's or 01118 flushes on the files. */ 01119 01120 ibool 01121 fil_space_free( 01122 /*===========*/ 01123 /* out: TRUE if success */ 01124 ulint id) /* in: space id */ 01125 { 01126 fil_system_t* system = fil_system; 01127 fil_space_t* space; 01128 fil_space_t* namespace; 01129 fil_node_t* fil_node; 01130 01131 mutex_enter(&(system->mutex)); 01132 01133 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01134 01135 if (!space) { 01136 ut_print_timestamp(stderr); 01137 fprintf(stderr, 01138 " InnoDB: Error: trying to remove tablespace %lu from the cache but\n" 01139 "InnoDB: it is not there.\n", (ulong) id); 01140 01141 mutex_exit(&(system->mutex)); 01142 01143 return(FALSE); 01144 } 01145 01146 HASH_DELETE(fil_space_t, hash, system->spaces, id, space); 01147 01148 HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name), 01149 namespace, 0 == strcmp(space->name, namespace->name)); 01150 ut_a(namespace); 01151 ut_a(space == namespace); 01152 01153 HASH_DELETE(fil_space_t, name_hash, system->name_hash, 01154 ut_fold_string(space->name), space); 01155 01156 if (space->is_in_unflushed_spaces) { 01157 space->is_in_unflushed_spaces = FALSE; 01158 01159 UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces, 01160 space); 01161 } 01162 01163 UT_LIST_REMOVE(space_list, system->space_list, space); 01164 01165 ut_a(space->magic_n == FIL_SPACE_MAGIC_N); 01166 ut_a(0 == space->n_pending_flushes); 01167 01168 fil_node = UT_LIST_GET_FIRST(space->chain); 01169 01170 while (fil_node != NULL) { 01171 fil_node_free(fil_node, system, space); 01172 01173 fil_node = UT_LIST_GET_FIRST(space->chain); 01174 } 01175 01176 ut_a(0 == UT_LIST_GET_LEN(space->chain)); 01177 01178 mutex_exit(&(system->mutex)); 01179 01180 rw_lock_free(&(space->latch)); 01181 01182 mem_free(space->name); 01183 mem_free(space); 01184 01185 return(TRUE); 01186 } 01187 01188 #ifdef UNIV_HOTBACKUP 01189 /*********************************************************************** 01190 Returns the tablespace object for a given id, or NULL if not found from the 01191 tablespace memory cache. */ 01192 static 01193 fil_space_t* 01194 fil_get_space_for_id_low( 01195 /*=====================*/ 01196 /* out: tablespace object or NULL; NOTE that you must 01197 own &(fil_system->mutex) to call this function! */ 01198 ulint id) /* in: space id */ 01199 { 01200 fil_system_t* system = fil_system; 01201 fil_space_t* space; 01202 01203 ut_ad(system); 01204 01205 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01206 01207 return(space); 01208 } 01209 #endif 01210 01211 /*********************************************************************** 01212 Returns the size of the space in pages. The tablespace must be cached in the 01213 memory cache. */ 01214 01215 ulint 01216 fil_space_get_size( 01217 /*===============*/ 01218 /* out: space size, 0 if space not found */ 01219 ulint id) /* in: space id */ 01220 { 01221 fil_system_t* system = fil_system; 01222 fil_node_t* node; 01223 fil_space_t* space; 01224 ulint size; 01225 01226 ut_ad(system); 01227 01228 fil_mutex_enter_and_prepare_for_io(id); 01229 01230 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01231 01232 if (space == NULL) { 01233 mutex_exit(&(system->mutex)); 01234 01235 return(0); 01236 } 01237 01238 if (space->size == 0 && space->purpose == FIL_TABLESPACE) { 01239 ut_a(id != 0); 01240 01241 ut_a(1 == UT_LIST_GET_LEN(space->chain)); 01242 01243 node = UT_LIST_GET_FIRST(space->chain); 01244 01245 /* It must be a single-table tablespace and we have not opened 01246 the file yet; the following calls will open it and update the 01247 size fields */ 01248 01249 fil_node_prepare_for_io(node, system, space); 01250 fil_node_complete_io(node, system, OS_FILE_READ); 01251 } 01252 01253 size = space->size; 01254 01255 mutex_exit(&(system->mutex)); 01256 01257 return(size); 01258 } 01259 01260 /*********************************************************************** 01261 Checks if the pair space, page_no refers to an existing page in a tablespace 01262 file space. The tablespace must be cached in the memory cache. */ 01263 01264 ibool 01265 fil_check_adress_in_tablespace( 01266 /*===========================*/ 01267 /* out: TRUE if the address is meaningful */ 01268 ulint id, /* in: space id */ 01269 ulint page_no)/* in: page number */ 01270 { 01271 if (fil_space_get_size(id) > page_no) { 01272 01273 return(TRUE); 01274 } 01275 01276 return(FALSE); 01277 } 01278 01279 /******************************************************************** 01280 Creates a the tablespace memory cache. */ 01281 static 01282 fil_system_t* 01283 fil_system_create( 01284 /*==============*/ 01285 /* out, own: tablespace memory cache */ 01286 ulint hash_size, /* in: hash table size */ 01287 ulint max_n_open) /* in: maximum number of open files; must be 01288 > 10 */ 01289 { 01290 fil_system_t* system; 01291 01292 ut_a(hash_size > 0); 01293 ut_a(max_n_open > 0); 01294 01295 system = mem_alloc(sizeof(fil_system_t)); 01296 01297 mutex_create(&system->mutex, SYNC_ANY_LATCH); 01298 01299 system->spaces = hash_create(hash_size); 01300 system->name_hash = hash_create(hash_size); 01301 01302 UT_LIST_INIT(system->LRU); 01303 01304 system->n_open = 0; 01305 system->max_n_open = max_n_open; 01306 01307 system->modification_counter = 0; 01308 system->max_assigned_id = 0; 01309 01310 system->tablespace_version = 0; 01311 01312 UT_LIST_INIT(system->unflushed_spaces); 01313 UT_LIST_INIT(system->space_list); 01314 01315 return(system); 01316 } 01317 01318 /******************************************************************** 01319 Initializes the tablespace memory cache. */ 01320 01321 void 01322 fil_init( 01323 /*=====*/ 01324 ulint max_n_open) /* in: max number of open files */ 01325 { 01326 ut_a(fil_system == NULL); 01327 01328 /*printf("Initializing the tablespace cache with max %lu open files\n", 01329 max_n_open); */ 01330 fil_system = fil_system_create(FIL_SYSTEM_HASH_SIZE, max_n_open); 01331 } 01332 01333 /*********************************************************************** 01334 Opens all log files and system tablespace data files. They stay open until the 01335 database server shutdown. This should be called at a server startup after the 01336 space objects for the log and the system tablespace have been created. The 01337 purpose of this operation is to make sure we never run out of file descriptors 01338 if we need to read from the insert buffer or to write to the log. */ 01339 01340 void 01341 fil_open_log_and_system_tablespace_files(void) 01342 /*==========================================*/ 01343 { 01344 fil_system_t* system = fil_system; 01345 fil_space_t* space; 01346 fil_node_t* node; 01347 01348 mutex_enter(&(system->mutex)); 01349 01350 space = UT_LIST_GET_FIRST(system->space_list); 01351 01352 while (space != NULL) { 01353 if (space->purpose != FIL_TABLESPACE || space->id == 0) { 01354 node = UT_LIST_GET_FIRST(space->chain); 01355 01356 while (node != NULL) { 01357 if (!node->open) { 01358 fil_node_open_file(node, system, 01359 space); 01360 } 01361 if (system->max_n_open < 10 + system->n_open) { 01362 fprintf(stderr, 01363 "InnoDB: Warning: you must raise the value of innodb_max_open_files in\n" 01364 "InnoDB: my.cnf! Remember that InnoDB keeps all log files and all system\n" 01365 "InnoDB: tablespace files open for the whole time mysqld is running, and\n" 01366 "InnoDB: needs to open also some .ibd files if the file-per-table storage\n" 01367 "InnoDB: model is used. Current open files %lu, max allowed open files %lu.\n", 01368 (ulong) system->n_open, 01369 (ulong) system->max_n_open); 01370 } 01371 node = UT_LIST_GET_NEXT(chain, node); 01372 } 01373 } 01374 space = UT_LIST_GET_NEXT(space_list, space); 01375 } 01376 01377 mutex_exit(&(system->mutex)); 01378 } 01379 01380 /*********************************************************************** 01381 Closes all open files. There must not be any pending i/o's or not flushed 01382 modifications in the files. */ 01383 01384 void 01385 fil_close_all_files(void) 01386 /*=====================*/ 01387 { 01388 fil_system_t* system = fil_system; 01389 fil_space_t* space; 01390 fil_node_t* node; 01391 01392 mutex_enter(&(system->mutex)); 01393 01394 space = UT_LIST_GET_FIRST(system->space_list); 01395 01396 while (space != NULL) { 01397 node = UT_LIST_GET_FIRST(space->chain); 01398 01399 while (node != NULL) { 01400 if (node->open) { 01401 fil_node_close_file(node, system); 01402 } 01403 node = UT_LIST_GET_NEXT(chain, node); 01404 } 01405 space = UT_LIST_GET_NEXT(space_list, space); 01406 } 01407 01408 mutex_exit(&(system->mutex)); 01409 } 01410 01411 /*********************************************************************** 01412 Sets the max tablespace id counter if the given number is bigger than the 01413 previous value. */ 01414 01415 void 01416 fil_set_max_space_id_if_bigger( 01417 /*===========================*/ 01418 ulint max_id) /* in: maximum known id */ 01419 { 01420 fil_system_t* system = fil_system; 01421 01422 if (max_id >= SRV_LOG_SPACE_FIRST_ID) { 01423 fprintf(stderr, 01424 "InnoDB: Fatal error: max tablespace id is too high, %lu\n", (ulong) max_id); 01425 ut_a(0); 01426 } 01427 01428 mutex_enter(&(system->mutex)); 01429 01430 if (system->max_assigned_id < max_id) { 01431 01432 system->max_assigned_id = max_id; 01433 } 01434 01435 mutex_exit(&(system->mutex)); 01436 } 01437 01438 /******************************************************************** 01439 Initializes the ibuf data structure for space 0 == the system tablespace. 01440 This can be called after the file space headers have been created and the 01441 dictionary system has been initialized. */ 01442 01443 void 01444 fil_ibuf_init_at_db_start(void) 01445 /*===========================*/ 01446 { 01447 fil_space_t* space; 01448 01449 space = UT_LIST_GET_FIRST(fil_system->space_list); 01450 01451 ut_a(space); 01452 ut_a(space->purpose == FIL_TABLESPACE); 01453 01454 space->ibuf_data = ibuf_data_init_for_space(space->id); 01455 } 01456 01457 /******************************************************************** 01458 Writes the flushed lsn and the latest archived log number to the page header 01459 of the first page of a data file. */ 01460 static 01461 ulint 01462 fil_write_lsn_and_arch_no_to_file( 01463 /*==============================*/ 01464 ulint space_id, /* in: space number */ 01465 ulint sum_of_sizes, /* in: combined size of previous files in 01466 space, in database pages */ 01467 dulint lsn, /* in: lsn to write */ 01468 ulint arch_log_no /* in: archived log number to write */ 01469 __attribute__((unused))) 01470 { 01471 byte* buf1; 01472 byte* buf; 01473 01474 buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); 01475 buf = ut_align(buf1, UNIV_PAGE_SIZE); 01476 01477 fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); 01478 01479 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); 01480 01481 fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); 01482 01483 return(DB_SUCCESS); 01484 } 01485 01486 /******************************************************************** 01487 Writes the flushed lsn and the latest archived log number to the page 01488 header of the first page of each data file in the system tablespace. */ 01489 01490 ulint 01491 fil_write_flushed_lsn_to_data_files( 01492 /*================================*/ 01493 /* out: DB_SUCCESS or error number */ 01494 dulint lsn, /* in: lsn to write */ 01495 ulint arch_log_no) /* in: latest archived log file number */ 01496 { 01497 fil_space_t* space; 01498 fil_node_t* node; 01499 ulint sum_of_sizes; 01500 ulint err; 01501 01502 mutex_enter(&(fil_system->mutex)); 01503 01504 space = UT_LIST_GET_FIRST(fil_system->space_list); 01505 01506 while (space) { 01507 /* We only write the lsn to all existing data files which have 01508 been open during the lifetime of the mysqld process; they are 01509 represented by the space objects in the tablespace memory 01510 cache. Note that all data files in the system tablespace 0 are 01511 always open. */ 01512 01513 if (space->purpose == FIL_TABLESPACE 01514 && space->id == 0) { 01515 sum_of_sizes = 0; 01516 01517 node = UT_LIST_GET_FIRST(space->chain); 01518 while (node) { 01519 mutex_exit(&(fil_system->mutex)); 01520 01521 err = fil_write_lsn_and_arch_no_to_file( 01522 space->id, sum_of_sizes, 01523 lsn, arch_log_no); 01524 if (err != DB_SUCCESS) { 01525 01526 return(err); 01527 } 01528 01529 mutex_enter(&(fil_system->mutex)); 01530 01531 sum_of_sizes += node->size; 01532 node = UT_LIST_GET_NEXT(chain, node); 01533 } 01534 } 01535 space = UT_LIST_GET_NEXT(space_list, space); 01536 } 01537 01538 mutex_exit(&(fil_system->mutex)); 01539 01540 return(DB_SUCCESS); 01541 } 01542 01543 /*********************************************************************** 01544 Reads the flushed lsn and arch no fields from a data file at database 01545 startup. */ 01546 01547 void 01548 fil_read_flushed_lsn_and_arch_log_no( 01549 /*=================================*/ 01550 os_file_t data_file, /* in: open data file */ 01551 ibool one_read_already, /* in: TRUE if min and max parameters 01552 below already contain sensible data */ 01553 #ifdef UNIV_LOG_ARCHIVE 01554 ulint* min_arch_log_no, /* in/out: */ 01555 ulint* max_arch_log_no, /* in/out: */ 01556 #endif /* UNIV_LOG_ARCHIVE */ 01557 dulint* min_flushed_lsn, /* in/out: */ 01558 dulint* max_flushed_lsn) /* in/out: */ 01559 { 01560 byte* buf; 01561 byte* buf2; 01562 dulint flushed_lsn; 01563 01564 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 01565 /* Align the memory for a possible read from a raw device */ 01566 buf = ut_align(buf2, UNIV_PAGE_SIZE); 01567 01568 os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE); 01569 01570 flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN); 01571 01572 ut_free(buf2); 01573 01574 if (!one_read_already) { 01575 *min_flushed_lsn = flushed_lsn; 01576 *max_flushed_lsn = flushed_lsn; 01577 #ifdef UNIV_LOG_ARCHIVE 01578 *min_arch_log_no = arch_log_no; 01579 *max_arch_log_no = arch_log_no; 01580 #endif /* UNIV_LOG_ARCHIVE */ 01581 return; 01582 } 01583 01584 if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) { 01585 *min_flushed_lsn = flushed_lsn; 01586 } 01587 if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) { 01588 *max_flushed_lsn = flushed_lsn; 01589 } 01590 #ifdef UNIV_LOG_ARCHIVE 01591 if (*min_arch_log_no > arch_log_no) { 01592 *min_arch_log_no = arch_log_no; 01593 } 01594 if (*max_arch_log_no < arch_log_no) { 01595 *max_arch_log_no = arch_log_no; 01596 } 01597 #endif /* UNIV_LOG_ARCHIVE */ 01598 } 01599 01600 /*================ SINGLE-TABLE TABLESPACES ==========================*/ 01601 01602 /*********************************************************************** 01603 Increments the count of pending insert buffer page merges, if space is not 01604 being deleted. */ 01605 01606 ibool 01607 fil_inc_pending_ibuf_merges( 01608 /*========================*/ 01609 /* out: TRUE if being deleted, and ibuf merges should 01610 be skipped */ 01611 ulint id) /* in: space id */ 01612 { 01613 fil_system_t* system = fil_system; 01614 fil_space_t* space; 01615 01616 mutex_enter(&(system->mutex)); 01617 01618 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01619 01620 if (space == NULL) { 01621 fprintf(stderr, 01622 "InnoDB: Error: trying to do ibuf merge to a dropped tablespace %lu\n", 01623 (ulong) id); 01624 } 01625 01626 if (space == NULL || space->stop_ibuf_merges) { 01627 mutex_exit(&(system->mutex)); 01628 01629 return(TRUE); 01630 } 01631 01632 space->n_pending_ibuf_merges++; 01633 01634 mutex_exit(&(system->mutex)); 01635 01636 return(FALSE); 01637 } 01638 01639 /*********************************************************************** 01640 Decrements the count of pending insert buffer page merges. */ 01641 01642 void 01643 fil_decr_pending_ibuf_merges( 01644 /*=========================*/ 01645 ulint id) /* in: space id */ 01646 { 01647 fil_system_t* system = fil_system; 01648 fil_space_t* space; 01649 01650 mutex_enter(&(system->mutex)); 01651 01652 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01653 01654 if (space == NULL) { 01655 fprintf(stderr, 01656 "InnoDB: Error: decrementing ibuf merge of a dropped tablespace %lu\n", 01657 (ulong) id); 01658 } 01659 01660 if (space != NULL) { 01661 space->n_pending_ibuf_merges--; 01662 } 01663 01664 mutex_exit(&(system->mutex)); 01665 } 01666 01667 /************************************************************ 01668 Creates the database directory for a table if it does not exist yet. */ 01669 static 01670 void 01671 fil_create_directory_for_tablename( 01672 /*===============================*/ 01673 const char* name) /* in: name in the standard 01674 'databasename/tablename' format */ 01675 { 01676 const char* namend; 01677 char* path; 01678 ulint len; 01679 01680 len = strlen(fil_path_to_mysql_datadir); 01681 namend = strchr(name, '/'); 01682 ut_a(namend); 01683 path = mem_alloc(len + (namend - name) + 2); 01684 01685 memcpy(path, fil_path_to_mysql_datadir, len); 01686 path[len] = '/'; 01687 memcpy(path + len + 1, name, namend - name); 01688 path[len + (namend - name) + 1] = 0; 01689 01690 srv_normalize_path_for_win(path); 01691 01692 ut_a(os_file_create_directory(path, FALSE)); 01693 mem_free(path); 01694 } 01695 01696 #ifndef UNIV_HOTBACKUP 01697 /************************************************************ 01698 Writes a log record about an .ibd file create/rename/delete. */ 01699 static 01700 void 01701 fil_op_write_log( 01702 /*=============*/ 01703 ulint type, /* in: MLOG_FILE_CREATE, 01704 MLOG_FILE_DELETE, or 01705 MLOG_FILE_RENAME */ 01706 ulint space_id, /* in: space id */ 01707 const char* name, /* in: table name in the familiar 01708 'databasename/tablename' format, or 01709 the file path in the case of 01710 MLOG_FILE_DELETE */ 01711 const char* new_name, /* in: if type is MLOG_FILE_RENAME, 01712 the new table name in the 01713 'databasename/tablename' format */ 01714 mtr_t* mtr) /* in: mini-transaction handle */ 01715 { 01716 byte* log_ptr; 01717 ulint len; 01718 01719 log_ptr = mlog_open(mtr, 11 + 2); 01720 01721 if (!log_ptr) { 01722 /* Logging in mtr is switched off during crash recovery: 01723 in that case mlog_open returns NULL */ 01724 return; 01725 } 01726 01727 log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0, 01728 log_ptr, mtr); 01729 /* Let us store the strings as null-terminated for easier readability 01730 and handling */ 01731 01732 len = strlen(name) + 1; 01733 01734 mach_write_to_2(log_ptr, len); 01735 log_ptr += 2; 01736 mlog_close(mtr, log_ptr); 01737 01738 mlog_catenate_string(mtr, (byte*) name, len); 01739 01740 if (type == MLOG_FILE_RENAME) { 01741 ulint len = strlen(new_name) + 1; 01742 log_ptr = mlog_open(mtr, 2 + len); 01743 ut_a(log_ptr); 01744 mach_write_to_2(log_ptr, len); 01745 log_ptr += 2; 01746 mlog_close(mtr, log_ptr); 01747 01748 mlog_catenate_string(mtr, (byte*) new_name, len); 01749 } 01750 } 01751 #endif 01752 01753 /*********************************************************************** 01754 Parses the body of a log record written about an .ibd file operation. That is, 01755 the log record part after the standard (type, space id, page no) header of the 01756 log record. 01757 01758 If desired, also replays the delete or rename operation if the .ibd file 01759 exists and the space id in it matches. Replays the create operation if a file 01760 at that path does not exist yet. If the database directory for the file to be 01761 created does not exist, then we create the directory, too. 01762 01763 Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the 01764 datadir that we should use in replaying the file operations. */ 01765 01766 byte* 01767 fil_op_log_parse_or_replay( 01768 /*=======================*/ 01769 /* out: end of log record, or NULL if the 01770 record was not completely contained between 01771 ptr and end_ptr */ 01772 byte* ptr, /* in: buffer containing the log record body, 01773 or an initial segment of it, if the record does 01774 not fir completely between ptr and end_ptr */ 01775 byte* end_ptr, /* in: buffer end */ 01776 ulint type, /* in: the type of this log record */ 01777 ibool do_replay, /* in: TRUE if we want to replay the 01778 operation, and not just parse the log record */ 01779 ulint space_id) /* in: if do_replay is TRUE, the space id of 01780 the tablespace in question; otherwise 01781 ignored */ 01782 { 01783 ulint name_len; 01784 ulint new_name_len; 01785 const char* name; 01786 const char* new_name = NULL; 01787 01788 if (end_ptr < ptr + 2) { 01789 01790 return(NULL); 01791 } 01792 01793 name_len = mach_read_from_2(ptr); 01794 01795 ptr += 2; 01796 01797 if (end_ptr < ptr + name_len) { 01798 01799 return(NULL); 01800 } 01801 01802 name = (const char*) ptr; 01803 01804 ptr += name_len; 01805 01806 if (type == MLOG_FILE_RENAME) { 01807 if (end_ptr < ptr + 2) { 01808 01809 return(NULL); 01810 } 01811 01812 new_name_len = mach_read_from_2(ptr); 01813 01814 ptr += 2; 01815 01816 if (end_ptr < ptr + new_name_len) { 01817 01818 return(NULL); 01819 } 01820 01821 new_name = (const char*) ptr; 01822 01823 ptr += new_name_len; 01824 } 01825 01826 /* We managed to parse a full log record body */ 01827 /* 01828 printf("Parsed log rec of type %lu space %lu\n" 01829 "name %s\n", type, space_id, name); 01830 01831 if (type == MLOG_FILE_RENAME) { 01832 printf("new name %s\n", new_name); 01833 } 01834 */ 01835 if (do_replay == FALSE) { 01836 01837 return(ptr); 01838 } 01839 01840 /* Let us try to perform the file operation, if sensible. Note that 01841 ibbackup has at this stage already read in all space id info to the 01842 fil0fil.c data structures. 01843 01844 NOTE that our algorithm is not guaranteed to work correctly if there 01845 were renames of tables during the backup. See ibbackup code for more 01846 on the problem. */ 01847 01848 if (type == MLOG_FILE_DELETE) { 01849 if (fil_tablespace_exists_in_mem(space_id)) { 01850 ut_a(fil_delete_tablespace(space_id)); 01851 } 01852 } else if (type == MLOG_FILE_RENAME) { 01853 /* We do the rename based on space id, not old file name; 01854 this should guarantee that after the log replay each .ibd file 01855 has the correct name for the latest log sequence number; the 01856 proof is left as an exercise :) */ 01857 01858 if (fil_tablespace_exists_in_mem(space_id)) { 01859 /* Create the database directory for the new name, if 01860 it does not exist yet */ 01861 fil_create_directory_for_tablename(new_name); 01862 01863 /* Rename the table if there is not yet a tablespace 01864 with the same name */ 01865 01866 if (fil_get_space_id_for_table(new_name) 01867 == ULINT_UNDEFINED) { 01868 /* We do not care of the old name, that is 01869 why we pass NULL as the first argument */ 01870 ut_a(fil_rename_tablespace(NULL, space_id, 01871 new_name)); 01872 } 01873 } 01874 } else { 01875 ut_a(type == MLOG_FILE_CREATE); 01876 01877 if (fil_tablespace_exists_in_mem(space_id)) { 01878 /* Do nothing */ 01879 } else if (fil_get_space_id_for_table(name) != 01880 ULINT_UNDEFINED) { 01881 /* Do nothing */ 01882 } else { 01883 /* Create the database directory for name, if it does 01884 not exist yet */ 01885 fil_create_directory_for_tablename(name); 01886 01887 ut_a(space_id != 0); 01888 01889 ut_a(DB_SUCCESS == 01890 fil_create_new_single_table_tablespace( 01891 &space_id, name, FALSE, 01892 FIL_IBD_FILE_INITIAL_SIZE)); 01893 } 01894 } 01895 01896 return(ptr); 01897 } 01898 01899 /*********************************************************************** 01900 Deletes a single-table tablespace. The tablespace must be cached in the 01901 memory cache. */ 01902 01903 ibool 01904 fil_delete_tablespace( 01905 /*==================*/ 01906 /* out: TRUE if success */ 01907 ulint id) /* in: space id */ 01908 { 01909 fil_system_t* system = fil_system; 01910 ibool success; 01911 fil_space_t* space; 01912 fil_node_t* node; 01913 ulint count = 0; 01914 char* path; 01915 01916 ut_a(id != 0); 01917 stop_ibuf_merges: 01918 mutex_enter(&(system->mutex)); 01919 01920 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01921 01922 if (space != NULL) { 01923 space->stop_ibuf_merges = TRUE; 01924 01925 if (space->n_pending_ibuf_merges == 0) { 01926 mutex_exit(&(system->mutex)); 01927 01928 count = 0; 01929 01930 goto try_again; 01931 } else { 01932 if (count > 5000) { 01933 ut_print_timestamp(stderr); 01934 fputs( 01935 " InnoDB: Warning: trying to delete tablespace ", stderr); 01936 ut_print_filename(stderr, space->name); 01937 fprintf(stderr, ",\n" 01938 "InnoDB: but there are %lu pending ibuf merges on it.\n" 01939 "InnoDB: Loop %lu.\n", (ulong) space->n_pending_ibuf_merges, 01940 (ulong) count); 01941 } 01942 01943 mutex_exit(&(system->mutex)); 01944 01945 os_thread_sleep(20000); 01946 count++; 01947 01948 goto stop_ibuf_merges; 01949 } 01950 } 01951 01952 mutex_exit(&(system->mutex)); 01953 count = 0; 01954 01955 try_again: 01956 mutex_enter(&(system->mutex)); 01957 01958 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 01959 01960 if (space == NULL) { 01961 ut_print_timestamp(stderr); 01962 fprintf(stderr, 01963 " InnoDB: Error: cannot delete tablespace %lu\n" 01964 "InnoDB: because it is not found in the tablespace memory cache.\n", 01965 (ulong) id); 01966 01967 mutex_exit(&(system->mutex)); 01968 01969 return(FALSE); 01970 } 01971 01972 ut_a(space); 01973 ut_a(space->n_pending_ibuf_merges == 0); 01974 01975 space->is_being_deleted = TRUE; 01976 01977 ut_a(UT_LIST_GET_LEN(space->chain) == 1); 01978 node = UT_LIST_GET_FIRST(space->chain); 01979 01980 if (space->n_pending_flushes > 0 || node->n_pending > 0) { 01981 if (count > 1000) { 01982 ut_print_timestamp(stderr); 01983 fputs( 01984 " InnoDB: Warning: trying to delete tablespace ", stderr); 01985 ut_print_filename(stderr, space->name); 01986 fprintf(stderr, ",\n" 01987 "InnoDB: but there are %lu flushes and %lu pending i/o's on it\n" 01988 "InnoDB: Loop %lu.\n", (ulong) space->n_pending_flushes, 01989 (ulong) node->n_pending, 01990 (ulong) count); 01991 } 01992 mutex_exit(&(system->mutex)); 01993 os_thread_sleep(20000); 01994 01995 count++; 01996 01997 goto try_again; 01998 } 01999 02000 path = mem_strdup(space->name); 02001 02002 mutex_exit(&(system->mutex)); 02003 #ifndef UNIV_HOTBACKUP 02004 /* Invalidate in the buffer pool all pages belonging to the 02005 tablespace. Since we have set space->is_being_deleted = TRUE, readahead 02006 or ibuf merge can no longer read more pages of this tablespace to the 02007 buffer pool. Thus we can clean the tablespace out of the buffer pool 02008 completely and permanently. The flag is_being_deleted also prevents 02009 fil_flush() from being applied to this tablespace. */ 02010 02011 buf_LRU_invalidate_tablespace(id); 02012 #endif 02013 /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ 02014 02015 success = fil_space_free(id); 02016 02017 if (success) { 02018 success = os_file_delete(path); 02019 02020 if (!success) { 02021 success = os_file_delete_if_exists(path); 02022 } 02023 } 02024 02025 if (success) { 02026 #ifndef UNIV_HOTBACKUP 02027 /* Write a log record about the deletion of the .ibd 02028 file, so that ibbackup can replay it in the 02029 --apply-log phase. We use a dummy mtr and the familiar 02030 log write mechanism. */ 02031 mtr_t mtr; 02032 02033 /* When replaying the operation in ibbackup, do not try 02034 to write any log record */ 02035 mtr_start(&mtr); 02036 02037 fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr); 02038 mtr_commit(&mtr); 02039 #endif 02040 mem_free(path); 02041 02042 return(TRUE); 02043 } 02044 02045 mem_free(path); 02046 02047 return(FALSE); 02048 } 02049 02050 /*********************************************************************** 02051 Discards a single-table tablespace. The tablespace must be cached in the 02052 memory cache. Discarding is like deleting a tablespace, but 02053 1) we do not drop the table from the data dictionary; 02054 2) we remove all insert buffer entries for the tablespace immediately; in DROP 02055 TABLE they are only removed gradually in the background; 02056 3) when the user does IMPORT TABLESPACE, the tablespace will have the same id 02057 as it originally had. */ 02058 02059 ibool 02060 fil_discard_tablespace( 02061 /*===================*/ 02062 /* out: TRUE if success */ 02063 ulint id) /* in: space id */ 02064 { 02065 ibool success; 02066 02067 success = fil_delete_tablespace(id); 02068 02069 if (!success) { 02070 fprintf(stderr, 02071 "InnoDB: Warning: cannot delete tablespace %lu in DISCARD TABLESPACE.\n" 02072 "InnoDB: But let us remove the insert buffer entries for this tablespace.\n", 02073 (ulong) id); 02074 } 02075 02076 /* Remove all insert buffer entries for the tablespace */ 02077 02078 ibuf_delete_for_discarded_space(id); 02079 02080 return(TRUE); 02081 } 02082 02083 /*********************************************************************** 02084 Renames the memory cache structures of a single-table tablespace. */ 02085 static 02086 ibool 02087 fil_rename_tablespace_in_mem( 02088 /*=========================*/ 02089 /* out: TRUE if success */ 02090 fil_space_t* space, /* in: tablespace memory object */ 02091 fil_node_t* node, /* in: file node of that tablespace */ 02092 const char* path) /* in: new name */ 02093 { 02094 fil_system_t* system = fil_system; 02095 fil_space_t* space2; 02096 const char* old_name = space->name; 02097 02098 HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name), 02099 space2, 0 == strcmp(old_name, space2->name)); 02100 if (space != space2) { 02101 fputs("InnoDB: Error: cannot find ", stderr); 02102 ut_print_filename(stderr, old_name); 02103 fputs(" in tablespace memory cache\n", stderr); 02104 02105 return(FALSE); 02106 } 02107 02108 HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path), 02109 space2, 0 == strcmp(path, space2->name)); 02110 if (space2 != NULL) { 02111 fputs("InnoDB: Error: ", stderr); 02112 ut_print_filename(stderr, path); 02113 fputs(" is already in tablespace memory cache\n", stderr); 02114 02115 return(FALSE); 02116 } 02117 02118 HASH_DELETE(fil_space_t, name_hash, system->name_hash, 02119 ut_fold_string(space->name), space); 02120 mem_free(space->name); 02121 mem_free(node->name); 02122 02123 space->name = mem_strdup(path); 02124 node->name = mem_strdup(path); 02125 02126 HASH_INSERT(fil_space_t, name_hash, system->name_hash, 02127 ut_fold_string(path), space); 02128 return(TRUE); 02129 } 02130 02131 /*********************************************************************** 02132 Allocates a file name for a single-table tablespace. The string must be freed 02133 by caller with mem_free(). */ 02134 static 02135 char* 02136 fil_make_ibd_name( 02137 /*==============*/ 02138 /* out, own: file name */ 02139 const char* name, /* in: table name or a dir path of a 02140 TEMPORARY table */ 02141 ibool is_temp) /* in: TRUE if it is a dir path */ 02142 { 02143 ulint namelen = strlen(name); 02144 ulint dirlen = strlen(fil_path_to_mysql_datadir); 02145 char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd"); 02146 02147 if (is_temp) { 02148 memcpy(filename, name, namelen); 02149 memcpy(filename + namelen, ".ibd", sizeof ".ibd"); 02150 } else { 02151 memcpy(filename, fil_path_to_mysql_datadir, dirlen); 02152 filename[dirlen] = '/'; 02153 02154 memcpy(filename + dirlen + 1, name, namelen); 02155 memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd"); 02156 } 02157 02158 srv_normalize_path_for_win(filename); 02159 02160 return(filename); 02161 } 02162 02163 /*********************************************************************** 02164 Renames a single-table tablespace. The tablespace must be cached in the 02165 tablespace memory cache. */ 02166 02167 ibool 02168 fil_rename_tablespace( 02169 /*==================*/ 02170 /* out: TRUE if success */ 02171 const char* old_name, /* in: old table name in the standard 02172 databasename/tablename format of 02173 InnoDB, or NULL if we do the rename 02174 based on the space id only */ 02175 ulint id, /* in: space id */ 02176 const char* new_name) /* in: new table name in the standard 02177 databasename/tablename format 02178 of InnoDB */ 02179 { 02180 fil_system_t* system = fil_system; 02181 ibool success; 02182 fil_space_t* space; 02183 fil_node_t* node; 02184 ulint count = 0; 02185 char* path; 02186 ibool old_name_was_specified = TRUE; 02187 char* old_path; 02188 02189 ut_a(id != 0); 02190 02191 if (old_name == NULL) { 02192 old_name = "(name not specified)"; 02193 old_name_was_specified = FALSE; 02194 } 02195 retry: 02196 count++; 02197 02198 if (count > 1000) { 02199 ut_print_timestamp(stderr); 02200 fputs(" InnoDB: Warning: problems renaming ", stderr); 02201 ut_print_filename(stderr, old_name); 02202 fputs(" to ", stderr); 02203 ut_print_filename(stderr, new_name); 02204 fprintf(stderr, ", %lu iterations\n", (ulong) count); 02205 } 02206 02207 mutex_enter(&(system->mutex)); 02208 02209 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 02210 02211 if (space == NULL) { 02212 fprintf(stderr, 02213 "InnoDB: Error: cannot find space id %lu from the tablespace memory cache\n" 02214 "InnoDB: though the table ", (ulong) id); 02215 ut_print_filename(stderr, old_name); 02216 fputs(" in a rename operation should have that id\n", stderr); 02217 mutex_exit(&(system->mutex)); 02218 02219 return(FALSE); 02220 } 02221 02222 if (count > 25000) { 02223 space->stop_ios = FALSE; 02224 mutex_exit(&(system->mutex)); 02225 02226 return(FALSE); 02227 } 02228 02229 /* We temporarily close the .ibd file because we do not trust that 02230 operating systems can rename an open file. For the closing we have to 02231 wait until there are no pending i/o's or flushes on the file. */ 02232 02233 space->stop_ios = TRUE; 02234 02235 ut_a(UT_LIST_GET_LEN(space->chain) == 1); 02236 node = UT_LIST_GET_FIRST(space->chain); 02237 02238 if (node->n_pending > 0 || node->n_pending_flushes > 0) { 02239 /* There are pending i/o's or flushes, sleep for a while and 02240 retry */ 02241 02242 mutex_exit(&(system->mutex)); 02243 02244 os_thread_sleep(20000); 02245 02246 goto retry; 02247 02248 } else if (node->modification_counter > node->flush_counter) { 02249 /* Flush the space */ 02250 02251 mutex_exit(&(system->mutex)); 02252 02253 os_thread_sleep(20000); 02254 02255 fil_flush(id); 02256 02257 goto retry; 02258 02259 } else if (node->open) { 02260 /* Close the file */ 02261 02262 fil_node_close_file(node, system); 02263 } 02264 02265 /* Check that the old name in the space is right */ 02266 02267 if (old_name_was_specified) { 02268 old_path = fil_make_ibd_name(old_name, FALSE); 02269 02270 ut_a(strcmp(space->name, old_path) == 0); 02271 ut_a(strcmp(node->name, old_path) == 0); 02272 } else { 02273 old_path = mem_strdup(space->name); 02274 } 02275 02276 /* Rename the tablespace and the node in the memory cache */ 02277 path = fil_make_ibd_name(new_name, FALSE); 02278 success = fil_rename_tablespace_in_mem(space, node, path); 02279 02280 if (success) { 02281 success = os_file_rename(old_path, path); 02282 02283 if (!success) { 02284 /* We have to revert the changes we made 02285 to the tablespace memory cache */ 02286 02287 ut_a(fil_rename_tablespace_in_mem(space, node, 02288 old_path)); 02289 } 02290 } 02291 02292 mem_free(path); 02293 mem_free(old_path); 02294 02295 space->stop_ios = FALSE; 02296 02297 mutex_exit(&(system->mutex)); 02298 02299 #ifndef UNIV_HOTBACKUP 02300 if (success) { 02301 mtr_t mtr; 02302 02303 mtr_start(&mtr); 02304 02305 fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name, 02306 &mtr); 02307 mtr_commit(&mtr); 02308 } 02309 #endif 02310 return(success); 02311 } 02312 02313 /*********************************************************************** 02314 Creates a new single-table tablespace to a database directory of MySQL. 02315 Database directories are under the 'datadir' of MySQL. The datadir is the 02316 directory of a running mysqld program. We can refer to it by simply the 02317 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp 02318 dir of the mysqld server. */ 02319 02320 ulint 02321 fil_create_new_single_table_tablespace( 02322 /*===================================*/ 02323 /* out: DB_SUCCESS or error code */ 02324 ulint* space_id, /* in/out: space id; if this is != 0, 02325 then this is an input parameter, 02326 otherwise output */ 02327 const char* tablename, /* in: the table name in the usual 02328 databasename/tablename format 02329 of InnoDB, or a dir path to a temp 02330 table */ 02331 ibool is_temp, /* in: TRUE if a table created with 02332 CREATE TEMPORARY TABLE */ 02333 ulint size) /* in: the initial size of the 02334 tablespace file in pages, 02335 must be >= FIL_IBD_FILE_INITIAL_SIZE */ 02336 { 02337 os_file_t file; 02338 ibool ret; 02339 ulint err; 02340 byte* buf2; 02341 byte* page; 02342 ibool success; 02343 char* path; 02344 02345 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); 02346 02347 path = fil_make_ibd_name(tablename, is_temp); 02348 02349 file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL, 02350 OS_DATA_FILE, &ret); 02351 if (ret == FALSE) { 02352 ut_print_timestamp(stderr); 02353 fputs(" InnoDB: Error creating file ", stderr); 02354 ut_print_filename(stderr, path); 02355 fputs(".\n", stderr); 02356 02357 /* The following call will print an error message */ 02358 02359 err = os_file_get_last_error(TRUE); 02360 02361 if (err == OS_FILE_ALREADY_EXISTS) { 02362 fputs( 02363 "InnoDB: The file already exists though the corresponding table did not\n" 02364 "InnoDB: exist in the InnoDB data dictionary. Have you moved InnoDB\n" 02365 "InnoDB: .ibd files around without using the SQL commands\n" 02366 "InnoDB: DISCARD TABLESPACE and IMPORT TABLESPACE, or did\n" 02367 "InnoDB: mysqld crash in the middle of CREATE TABLE? You can\n" 02368 "InnoDB: resolve the problem by removing the file ", stderr); 02369 ut_print_filename(stderr, path); 02370 fputs("\n" 02371 "InnoDB: under the 'datadir' of MySQL.\n", stderr); 02372 02373 mem_free(path); 02374 return(DB_TABLESPACE_ALREADY_EXISTS); 02375 } 02376 02377 if (err == OS_FILE_DISK_FULL) { 02378 02379 mem_free(path); 02380 return(DB_OUT_OF_FILE_SPACE); 02381 } 02382 02383 mem_free(path); 02384 return(DB_ERROR); 02385 } 02386 02387 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 02388 /* Align the memory for file i/o if we might have O_DIRECT set */ 02389 page = ut_align(buf2, UNIV_PAGE_SIZE); 02390 02391 ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); 02392 02393 if (!ret) { 02394 ut_free(buf2); 02395 os_file_close(file); 02396 os_file_delete(path); 02397 02398 mem_free(path); 02399 return(DB_OUT_OF_FILE_SPACE); 02400 } 02401 02402 if (*space_id == 0) { 02403 *space_id = fil_assign_new_space_id(); 02404 } 02405 02406 /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ 02407 02408 if (*space_id == ULINT_UNDEFINED) { 02409 ut_free(buf2); 02410 error_exit: 02411 os_file_close(file); 02412 error_exit2: 02413 os_file_delete(path); 02414 02415 mem_free(path); 02416 return(DB_ERROR); 02417 } 02418 02419 /* We have to write the space id to the file immediately and flush the 02420 file to disk. This is because in crash recovery we must be aware what 02421 tablespaces exist and what are their space id's, so that we can apply 02422 the log records to the right file. It may take quite a while until 02423 buffer pool flush algorithms write anything to the file and flush it to 02424 disk. If we would not write here anything, the file would be filled 02425 with zeros from the call of os_file_set_size(), until a buffer pool 02426 flush would write to it. */ 02427 02428 memset(page, '\0', UNIV_PAGE_SIZE); 02429 02430 fsp_header_write_space_id(page, *space_id); 02431 02432 buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0); 02433 02434 ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE); 02435 02436 ut_free(buf2); 02437 02438 if (!ret) { 02439 fputs( 02440 "InnoDB: Error: could not write the first page to tablespace ", stderr); 02441 ut_print_filename(stderr, path); 02442 putc('\n', stderr); 02443 goto error_exit; 02444 } 02445 02446 ret = os_file_flush(file); 02447 02448 if (!ret) { 02449 fputs( 02450 "InnoDB: Error: file flush of tablespace ", stderr); 02451 ut_print_filename(stderr, path); 02452 fputs(" failed\n", stderr); 02453 goto error_exit; 02454 } 02455 02456 os_file_close(file); 02457 02458 if (*space_id == ULINT_UNDEFINED) { 02459 goto error_exit2; 02460 } 02461 02462 success = fil_space_create(path, *space_id, FIL_TABLESPACE); 02463 02464 if (!success) { 02465 goto error_exit2; 02466 } 02467 02468 fil_node_create(path, size, *space_id, FALSE); 02469 02470 #ifndef UNIV_HOTBACKUP 02471 { 02472 mtr_t mtr; 02473 02474 mtr_start(&mtr); 02475 02476 fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename, NULL, &mtr); 02477 02478 mtr_commit(&mtr); 02479 } 02480 #endif 02481 mem_free(path); 02482 return(DB_SUCCESS); 02483 } 02484 02485 /************************************************************************ 02486 It is possible, though very improbable, that the lsn's in the tablespace to be 02487 imported have risen above the current system lsn, if a lengthy purge, ibuf 02488 merge, or rollback was performed on a backup taken with ibbackup. If that is 02489 the case, reset page lsn's in the file. We assume that mysqld was shut down 02490 after it performed these cleanup operations on the .ibd file, so that it at 02491 the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the 02492 first page of the .ibd file, and we can determine whether we need to reset the 02493 lsn's just by looking at that flush lsn. */ 02494 02495 ibool 02496 fil_reset_too_high_lsns( 02497 /*====================*/ 02498 /* out: TRUE if success */ 02499 const char* name, /* in: table name in the 02500 databasename/tablename format */ 02501 dulint current_lsn) /* in: reset lsn's if the lsn stamped 02502 to FIL_PAGE_FILE_FLUSH_LSN in the 02503 first page is too high */ 02504 { 02505 os_file_t file; 02506 char* filepath; 02507 byte* page; 02508 byte* buf2; 02509 dulint flush_lsn; 02510 ulint space_id; 02511 ib_longlong file_size; 02512 ib_longlong offset; 02513 ulint page_no; 02514 ibool success; 02515 02516 filepath = fil_make_ibd_name(name, FALSE); 02517 02518 file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN, 02519 OS_FILE_READ_WRITE, &success); 02520 if (!success) { 02521 /* The following call prints an error message */ 02522 os_file_get_last_error(TRUE); 02523 02524 ut_print_timestamp(stderr); 02525 02526 fputs( 02527 " InnoDB: Error: trying to open a table, but could not\n" 02528 "InnoDB: open the tablespace file ", stderr); 02529 ut_print_filename(stderr, filepath); 02530 fputs("!\n", stderr); 02531 mem_free(filepath); 02532 02533 return(FALSE); 02534 } 02535 02536 /* Read the first page of the tablespace */ 02537 02538 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 02539 /* Align the memory for file i/o if we might have O_DIRECT set */ 02540 page = ut_align(buf2, UNIV_PAGE_SIZE); 02541 02542 success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); 02543 if (!success) { 02544 02545 goto func_exit; 02546 } 02547 02548 /* We have to read the file flush lsn from the header of the file */ 02549 02550 flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); 02551 02552 if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) { 02553 /* Ok */ 02554 success = TRUE; 02555 02556 goto func_exit; 02557 } 02558 02559 space_id = fsp_header_get_space_id(page); 02560 02561 ut_print_timestamp(stderr); 02562 fprintf(stderr, 02563 " InnoDB: Flush lsn in the tablespace file %lu to be imported\n" 02564 "InnoDB: is %lu %lu, which exceeds current system lsn %lu %lu.\n" 02565 "InnoDB: We reset the lsn's in the file ", 02566 (ulong) space_id, 02567 (ulong) ut_dulint_get_high(flush_lsn), 02568 (ulong) ut_dulint_get_low(flush_lsn), 02569 (ulong) ut_dulint_get_high(current_lsn), 02570 (ulong) ut_dulint_get_low(current_lsn)); 02571 ut_print_filename(stderr, filepath); 02572 fputs(".\n", stderr); 02573 02574 /* Loop through all the pages in the tablespace and reset the lsn and 02575 the page checksum if necessary */ 02576 02577 file_size = os_file_get_size_as_iblonglong(file); 02578 02579 for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) { 02580 success = os_file_read(file, page, 02581 (ulint)(offset & 0xFFFFFFFFUL), 02582 (ulint)(offset >> 32), UNIV_PAGE_SIZE); 02583 if (!success) { 02584 02585 goto func_exit; 02586 } 02587 if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN), 02588 current_lsn) > 0) { 02589 /* We have to reset the lsn */ 02590 space_id = mach_read_from_4(page 02591 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); 02592 page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); 02593 02594 buf_flush_init_for_writing(page, current_lsn, space_id, 02595 page_no); 02596 success = os_file_write(filepath, file, page, 02597 (ulint)(offset & 0xFFFFFFFFUL), 02598 (ulint)(offset >> 32), UNIV_PAGE_SIZE); 02599 if (!success) { 02600 02601 goto func_exit; 02602 } 02603 } 02604 } 02605 02606 success = os_file_flush(file); 02607 if (!success) { 02608 02609 goto func_exit; 02610 } 02611 02612 /* We now update the flush_lsn stamp at the start of the file */ 02613 success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); 02614 if (!success) { 02615 02616 goto func_exit; 02617 } 02618 02619 mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn); 02620 02621 success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE); 02622 if (!success) { 02623 02624 goto func_exit; 02625 } 02626 success = os_file_flush(file); 02627 func_exit: 02628 os_file_close(file); 02629 ut_free(buf2); 02630 mem_free(filepath); 02631 02632 return(success); 02633 } 02634 02635 /************************************************************************ 02636 Tries to open a single-table tablespace and optionally checks the space id is 02637 right in it. If does not succeed, prints an error message to the .err log. This 02638 function is used to open a tablespace when we start up mysqld, and also in 02639 IMPORT TABLESPACE. 02640 NOTE that we assume this operation is used either at the database startup 02641 or under the protection of the dictionary mutex, so that two users cannot 02642 race here. This operation does not leave the file associated with the 02643 tablespace open, but closes it after we have looked at the space id in it. */ 02644 02645 ibool 02646 fil_open_single_table_tablespace( 02647 /*=============================*/ 02648 /* out: TRUE if success */ 02649 ibool check_space_id, /* in: should we check that the space 02650 id in the file is right; we assume 02651 that this function runs much faster 02652 if no check is made, since accessing 02653 the file inode probably is much 02654 faster (the OS caches them) than 02655 accessing the first page of the file */ 02656 ulint id, /* in: space id */ 02657 const char* name) /* in: table name in the 02658 databasename/tablename format */ 02659 { 02660 os_file_t file; 02661 char* filepath; 02662 ibool success; 02663 byte* buf2; 02664 byte* page; 02665 ulint space_id; 02666 ibool ret = TRUE; 02667 02668 filepath = fil_make_ibd_name(name, FALSE); 02669 02670 file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN, 02671 OS_FILE_READ_ONLY, &success); 02672 if (!success) { 02673 /* The following call prints an error message */ 02674 os_file_get_last_error(TRUE); 02675 02676 ut_print_timestamp(stderr); 02677 02678 fputs( 02679 " InnoDB: Error: trying to open a table, but could not\n" 02680 "InnoDB: open the tablespace file ", stderr); 02681 ut_print_filename(stderr, filepath); 02682 fputs("!\n" 02683 "InnoDB: Have you moved InnoDB .ibd files around without using the\n" 02684 "InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n" 02685 "InnoDB: It is also possible that this is a temporary table #sql...,\n" 02686 "InnoDB: and MySQL removed the .ibd file for this.\n" 02687 "InnoDB: Please refer to\n" 02688 "InnoDB:" 02689 " http://dev.mysql.com/doc/mysql/en/InnoDB_troubleshooting_datadict.html\n" 02690 "InnoDB: for how to resolve the issue.\n", stderr); 02691 02692 mem_free(filepath); 02693 02694 return(FALSE); 02695 } 02696 02697 if (!check_space_id) { 02698 space_id = id; 02699 02700 goto skip_check; 02701 } 02702 02703 /* Read the first page of the tablespace */ 02704 02705 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 02706 /* Align the memory for file i/o if we might have O_DIRECT set */ 02707 page = ut_align(buf2, UNIV_PAGE_SIZE); 02708 02709 success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); 02710 02711 /* We have to read the tablespace id from the file */ 02712 02713 space_id = fsp_header_get_space_id(page); 02714 02715 ut_free(buf2); 02716 02717 if (space_id != id) { 02718 ut_print_timestamp(stderr); 02719 02720 fputs( 02721 " InnoDB: Error: tablespace id in file ", stderr); 02722 ut_print_filename(stderr, filepath); 02723 fprintf(stderr, " is %lu, but in the InnoDB\n" 02724 "InnoDB: data dictionary it is %lu.\n" 02725 "InnoDB: Have you moved InnoDB .ibd files around without using the\n" 02726 "InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n" 02727 "InnoDB: Please refer to\n" 02728 "InnoDB:" 02729 " http://dev.mysql.com/doc/mysql/en/InnoDB_troubleshooting_datadict.html\n" 02730 "InnoDB: for how to resolve the issue.\n", (ulong) space_id, (ulong) id); 02731 02732 ret = FALSE; 02733 02734 goto func_exit; 02735 } 02736 02737 skip_check: 02738 success = fil_space_create(filepath, space_id, FIL_TABLESPACE); 02739 02740 if (!success) { 02741 goto func_exit; 02742 } 02743 02744 /* We do not measure the size of the file, that is why we pass the 0 02745 below */ 02746 02747 fil_node_create(filepath, 0, space_id, FALSE); 02748 func_exit: 02749 os_file_close(file); 02750 mem_free(filepath); 02751 02752 return(ret); 02753 } 02754 02755 #ifdef UNIV_HOTBACKUP 02756 /*********************************************************************** 02757 Allocates a file name for an old version of a single-table tablespace. 02758 The string must be freed by caller with mem_free()! */ 02759 static 02760 char* 02761 fil_make_ibbackup_old_name( 02762 /*=======================*/ 02763 /* out, own: file name */ 02764 const char* name) /* in: original file name */ 02765 { 02766 static const char suffix[] = "_ibbackup_old_vers_"; 02767 ulint len = strlen(name); 02768 char* path = mem_alloc(len + (15 + sizeof suffix)); 02769 02770 memcpy(path, name, len); 02771 memcpy(path + len, suffix, (sizeof suffix) - 1); 02772 ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix); 02773 return(path); 02774 } 02775 #endif /* UNIV_HOTBACKUP */ 02776 02777 /************************************************************************ 02778 Opens an .ibd file and adds the associated single-table tablespace to the 02779 InnoDB fil0fil.c data structures. */ 02780 static 02781 void 02782 fil_load_single_table_tablespace( 02783 /*=============================*/ 02784 const char* dbname, /* in: database name */ 02785 const char* filename) /* in: file name (not a path), 02786 including the .ibd extension */ 02787 { 02788 os_file_t file; 02789 char* filepath; 02790 ibool success; 02791 byte* buf2; 02792 byte* page; 02793 ulint space_id; 02794 ulint size_low; 02795 ulint size_high; 02796 ib_longlong size; 02797 #ifdef UNIV_HOTBACKUP 02798 fil_space_t* space; 02799 #endif 02800 filepath = mem_alloc(strlen(dbname) + strlen(filename) 02801 + strlen(fil_path_to_mysql_datadir) + 3); 02802 02803 sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname, 02804 filename); 02805 srv_normalize_path_for_win(filepath); 02806 #ifdef __WIN__ 02807 # ifndef UNIV_HOTBACKUP 02808 /* If lower_case_table_names is 0 or 2, then MySQL allows database 02809 directory names with upper case letters. On Windows, all table and 02810 database names in InnoDB are internally always in lower case. Put the 02811 file path to lower case, so that we are consistent with InnoDB's 02812 internal data dictionary. */ 02813 02814 dict_casedn_str(filepath); 02815 # endif /* !UNIV_HOTBACKUP */ 02816 #endif 02817 file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN, 02818 OS_FILE_READ_ONLY, &success); 02819 if (!success) { 02820 /* The following call prints an error message */ 02821 os_file_get_last_error(TRUE); 02822 02823 fprintf(stderr, 02824 "InnoDB: Error: could not open single-table tablespace file\n" 02825 "InnoDB: %s!\n" 02826 "InnoDB: We do not continue the crash recovery, because the table may become\n" 02827 "InnoDB: corrupt if we cannot apply the log records in the InnoDB log to it.\n" 02828 "InnoDB: To fix the problem and start mysqld:\n" 02829 "InnoDB: 1) If there is a permission problem in the file and mysqld cannot\n" 02830 "InnoDB: open the file, you should modify the permissions.\n" 02831 "InnoDB: 2) If the table is not needed, or you can restore it from a backup,\n" 02832 "InnoDB: then you can remove the .ibd file, and InnoDB will do a normal\n" 02833 "InnoDB: crash recovery and ignore that table.\n" 02834 "InnoDB: 3) If the file system or the disk is broken, and you cannot remove\n" 02835 "InnoDB: the .ibd file, you can set innodb_force_recovery > 0 in my.cnf\n" 02836 "InnoDB: and force InnoDB to continue crash recovery here.\n", filepath); 02837 02838 mem_free(filepath); 02839 02840 if (srv_force_recovery > 0) { 02841 fprintf(stderr, 02842 "InnoDB: innodb_force_recovery was set to %lu. Continuing crash recovery\n" 02843 "InnoDB: even though we cannot access the .ibd file of this table.\n", 02844 srv_force_recovery); 02845 return; 02846 } 02847 02848 exit(1); 02849 } 02850 02851 success = os_file_get_size(file, &size_low, &size_high); 02852 02853 if (!success) { 02854 /* The following call prints an error message */ 02855 os_file_get_last_error(TRUE); 02856 02857 fprintf(stderr, 02858 "InnoDB: Error: could not measure the size of single-table tablespace file\n" 02859 "InnoDB: %s!\n" 02860 "InnoDB: We do not continue crash recovery, because the table will become\n" 02861 "InnoDB: corrupt if we cannot apply the log records in the InnoDB log to it.\n" 02862 "InnoDB: To fix the problem and start mysqld:\n" 02863 "InnoDB: 1) If there is a permission problem in the file and mysqld cannot\n" 02864 "InnoDB: access the file, you should modify the permissions.\n" 02865 "InnoDB: 2) If the table is not needed, or you can restore it from a backup,\n" 02866 "InnoDB: then you can remove the .ibd file, and InnoDB will do a normal\n" 02867 "InnoDB: crash recovery and ignore that table.\n" 02868 "InnoDB: 3) If the file system or the disk is broken, and you cannot remove\n" 02869 "InnoDB: the .ibd file, you can set innodb_force_recovery > 0 in my.cnf\n" 02870 "InnoDB: and force InnoDB to continue crash recovery here.\n", filepath); 02871 02872 os_file_close(file); 02873 mem_free(filepath); 02874 02875 if (srv_force_recovery > 0) { 02876 fprintf(stderr, 02877 "InnoDB: innodb_force_recovery was set to %lu. Continuing crash recovery\n" 02878 "InnoDB: even though we cannot access the .ibd file of this table.\n", 02879 srv_force_recovery); 02880 return; 02881 } 02882 02883 exit(1); 02884 } 02885 02886 /* TODO: What to do in other cases where we cannot access an .ibd 02887 file during a crash recovery? */ 02888 02889 /* Every .ibd file is created >= 4 pages in size. Smaller files 02890 cannot be ok. */ 02891 02892 size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low; 02893 #ifndef UNIV_HOTBACKUP 02894 if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { 02895 fprintf(stderr, 02896 "InnoDB: Error: the size of single-table tablespace file %s\n" 02897 "InnoDB: is only %lu %lu, should be at least %lu!", filepath, 02898 (ulong) size_high, 02899 (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE)); 02900 os_file_close(file); 02901 mem_free(filepath); 02902 02903 return; 02904 } 02905 #endif 02906 /* Read the first page of the tablespace if the size big enough */ 02907 02908 buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); 02909 /* Align the memory for file i/o if we might have O_DIRECT set */ 02910 page = ut_align(buf2, UNIV_PAGE_SIZE); 02911 02912 if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { 02913 success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); 02914 02915 /* We have to read the tablespace id from the file */ 02916 02917 space_id = fsp_header_get_space_id(page); 02918 } else { 02919 space_id = ULINT_UNDEFINED; 02920 } 02921 02922 #ifndef UNIV_HOTBACKUP 02923 if (space_id == ULINT_UNDEFINED || space_id == 0) { 02924 fprintf(stderr, 02925 "InnoDB: Error: tablespace id %lu in file %s is not sensible\n", 02926 (ulong) space_id, 02927 filepath); 02928 goto func_exit; 02929 } 02930 #else 02931 if (space_id == ULINT_UNDEFINED || space_id == 0) { 02932 char* new_path; 02933 02934 fprintf(stderr, 02935 "InnoDB: Renaming tablespace %s of id %lu,\n" 02936 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" 02937 "InnoDB: because its size %lld is too small (< 4 pages 16 kB each),\n" 02938 "InnoDB: or the space id in the file header is not sensible.\n" 02939 "InnoDB: This can happen in an ibbackup run, and is not dangerous.\n", 02940 filepath, space_id, filepath, size); 02941 os_file_close(file); 02942 02943 new_path = fil_make_ibbackup_old_name(filepath); 02944 ut_a(os_file_rename(filepath, new_path)); 02945 02946 ut_free(buf2); 02947 mem_free(filepath); 02948 mem_free(new_path); 02949 02950 return; 02951 } 02952 02953 /* A backup may contain the same space several times, if the space got 02954 renamed at a sensitive time. Since it is enough to have one version of 02955 the space, we rename the file if a space with the same space id 02956 already exists in the tablespace memory cache. We rather rename the 02957 file than delete it, because if there is a bug, we do not want to 02958 destroy valuable data. */ 02959 02960 mutex_enter(&(fil_system->mutex)); 02961 02962 space = fil_get_space_for_id_low(space_id); 02963 02964 if (space) { 02965 char* new_path; 02966 02967 fprintf(stderr, 02968 "InnoDB: Renaming tablespace %s of id %lu,\n" 02969 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" 02970 "InnoDB: because space %s with the same id\n" 02971 "InnoDB: was scanned earlier. This can happen if you have renamed tables\n" 02972 "InnoDB: during an ibbackup run.\n", filepath, space_id, filepath, 02973 space->name); 02974 os_file_close(file); 02975 02976 new_path = fil_make_ibbackup_old_name(filepath); 02977 02978 mutex_exit(&(fil_system->mutex)); 02979 02980 ut_a(os_file_rename(filepath, new_path)); 02981 02982 ut_free(buf2); 02983 mem_free(filepath); 02984 mem_free(new_path); 02985 02986 return; 02987 } 02988 mutex_exit(&(fil_system->mutex)); 02989 #endif 02990 success = fil_space_create(filepath, space_id, FIL_TABLESPACE); 02991 02992 if (!success) { 02993 02994 goto func_exit; 02995 } 02996 02997 /* We do not use the size information we have about the file, because 02998 the rounding formula for extents and pages is somewhat complex; we 02999 let fil_node_open() do that task. */ 03000 03001 fil_node_create(filepath, 0, space_id, FALSE); 03002 func_exit: 03003 os_file_close(file); 03004 ut_free(buf2); 03005 mem_free(filepath); 03006 } 03007 03008 /*************************************************************************** 03009 A fault-tolerant function that tries to read the next file name in the 03010 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The 03011 idea is to read as much good data as we can and jump over bad data. */ 03012 static 03013 int 03014 fil_file_readdir_next_file( 03015 /*=======================*/ 03016 /* out: 0 if ok, -1 if error even after the 03017 retries, 1 if at the end of the directory */ 03018 ulint* err, /* out: this is set to DB_ERROR if an error 03019 was encountered, otherwise not changed */ 03020 const char* dirname,/* in: directory name or path */ 03021 os_file_dir_t dir, /* in: directory stream */ 03022 os_file_stat_t* info) /* in/out: buffer where the info is returned */ 03023 { 03024 ulint i; 03025 int ret; 03026 03027 for (i = 0; i < 100; i++) { 03028 ret = os_file_readdir_next_file(dirname, dir, info); 03029 03030 if (ret != -1) { 03031 03032 return(ret); 03033 } 03034 03035 fprintf(stderr, 03036 "InnoDB: Error: os_file_readdir_next_file() returned -1 in\n" 03037 "InnoDB: directory %s\n" 03038 "InnoDB: Crash recovery may have failed for some .ibd files!\n", dirname); 03039 03040 *err = DB_ERROR; 03041 } 03042 03043 return(-1); 03044 } 03045 03046 /************************************************************************ 03047 At the server startup, if we need crash recovery, scans the database 03048 directories under the MySQL datadir, looking for .ibd files. Those files are 03049 single-table tablespaces. We need to know the space id in each of them so that 03050 we know into which file we should look to check the contents of a page stored 03051 in the doublewrite buffer, also to know where to apply log records where the 03052 space id is != 0. */ 03053 03054 ulint 03055 fil_load_single_table_tablespaces(void) 03056 /*===================================*/ 03057 /* out: DB_SUCCESS or error number */ 03058 { 03059 int ret; 03060 char* dbpath = NULL; 03061 ulint dbpath_len = 100; 03062 os_file_dir_t dir; 03063 os_file_dir_t dbdir; 03064 os_file_stat_t dbinfo; 03065 os_file_stat_t fileinfo; 03066 ulint err = DB_SUCCESS; 03067 03068 /* The datadir of MySQL is always the default directory of mysqld */ 03069 03070 dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE); 03071 03072 if (dir == NULL) { 03073 03074 return(DB_ERROR); 03075 } 03076 03077 dbpath = mem_alloc(dbpath_len); 03078 03079 /* Scan all directories under the datadir. They are the database 03080 directories of MySQL. */ 03081 03082 ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir, 03083 &dbinfo); 03084 while (ret == 0) { 03085 ulint len; 03086 /* printf("Looking at %s in datadir\n", dbinfo.name); */ 03087 03088 if (dbinfo.type == OS_FILE_TYPE_FILE 03089 || dbinfo.type == OS_FILE_TYPE_UNKNOWN) { 03090 03091 goto next_datadir_item; 03092 } 03093 03094 /* We found a symlink or a directory; try opening it to see 03095 if a symlink is a directory */ 03096 03097 len = strlen(fil_path_to_mysql_datadir) 03098 + strlen (dbinfo.name) + 2; 03099 if (len > dbpath_len) { 03100 dbpath_len = len; 03101 03102 if (dbpath) { 03103 mem_free(dbpath); 03104 } 03105 03106 dbpath = mem_alloc(dbpath_len); 03107 } 03108 sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir, 03109 dbinfo.name); 03110 srv_normalize_path_for_win(dbpath); 03111 03112 dbdir = os_file_opendir(dbpath, FALSE); 03113 03114 if (dbdir != NULL) { 03115 /* printf("Opened dir %s\n", dbinfo.name); */ 03116 03117 /* We found a database directory; loop through it, 03118 looking for possible .ibd files in it */ 03119 03120 ret = fil_file_readdir_next_file(&err, dbpath, dbdir, 03121 &fileinfo); 03122 while (ret == 0) { 03123 /* printf( 03124 " Looking at file %s\n", fileinfo.name); */ 03125 03126 if (fileinfo.type == OS_FILE_TYPE_DIR) { 03127 03128 goto next_file_item; 03129 } 03130 03131 /* We found a symlink or a file */ 03132 if (strlen(fileinfo.name) > 4 03133 && 0 == strcmp(fileinfo.name + 03134 strlen(fileinfo.name) - 4, 03135 ".ibd")) { 03136 /* The name ends in .ibd; try opening 03137 the file */ 03138 fil_load_single_table_tablespace( 03139 dbinfo.name, fileinfo.name); 03140 } 03141 next_file_item: 03142 ret = fil_file_readdir_next_file(&err, 03143 dbpath, dbdir, 03144 &fileinfo); 03145 } 03146 03147 if (0 != os_file_closedir(dbdir)) { 03148 fputs( 03149 "InnoDB: Warning: could not close database directory ", stderr); 03150 ut_print_filename(stderr, dbpath); 03151 putc('\n', stderr); 03152 03153 err = DB_ERROR; 03154 } 03155 } 03156 03157 next_datadir_item: 03158 ret = fil_file_readdir_next_file(&err, 03159 fil_path_to_mysql_datadir, 03160 dir, &dbinfo); 03161 } 03162 03163 mem_free(dbpath); 03164 03165 if (0 != os_file_closedir(dir)) { 03166 fprintf(stderr, 03167 "InnoDB: Error: could not close MySQL datadir\n"); 03168 03169 return(DB_ERROR); 03170 } 03171 03172 return(err); 03173 } 03174 03175 /************************************************************************ 03176 If we need crash recovery, and we have called 03177 fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), 03178 we can call this function to print an error message of orphaned .ibd files 03179 for which there is not a data dictionary entry with a matching table name 03180 and space id. */ 03181 03182 void 03183 fil_print_orphaned_tablespaces(void) 03184 /*================================*/ 03185 { 03186 fil_system_t* system = fil_system; 03187 fil_space_t* space; 03188 03189 mutex_enter(&(system->mutex)); 03190 03191 space = UT_LIST_GET_FIRST(system->space_list); 03192 03193 while (space) { 03194 if (space->purpose == FIL_TABLESPACE && space->id != 0 03195 && !space->mark) { 03196 fputs("InnoDB: Warning: tablespace ", stderr); 03197 ut_print_filename(stderr, space->name); 03198 fprintf(stderr, " of id %lu has no matching table in\n" 03199 "InnoDB: the InnoDB data dictionary.\n", (ulong) space->id); 03200 } 03201 03202 space = UT_LIST_GET_NEXT(space_list, space); 03203 } 03204 03205 mutex_exit(&(system->mutex)); 03206 } 03207 03208 /*********************************************************************** 03209 Returns TRUE if a single-table tablespace does not exist in the memory cache, 03210 or is being deleted there. */ 03211 03212 ibool 03213 fil_tablespace_deleted_or_being_deleted_in_mem( 03214 /*===========================================*/ 03215 /* out: TRUE if does not exist or is being\ 03216 deleted */ 03217 ulint id, /* in: space id */ 03218 ib_longlong version)/* in: tablespace_version should be this; if 03219 you pass -1 as the value of this, then this 03220 parameter is ignored */ 03221 { 03222 fil_system_t* system = fil_system; 03223 fil_space_t* space; 03224 03225 ut_ad(system); 03226 03227 mutex_enter(&(system->mutex)); 03228 03229 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03230 03231 if (space == NULL || space->is_being_deleted) { 03232 mutex_exit(&(system->mutex)); 03233 03234 return(TRUE); 03235 } 03236 03237 if (version != ((ib_longlong)-1) 03238 && space->tablespace_version != version) { 03239 mutex_exit(&(system->mutex)); 03240 03241 return(TRUE); 03242 } 03243 03244 mutex_exit(&(system->mutex)); 03245 03246 return(FALSE); 03247 } 03248 03249 /*********************************************************************** 03250 Returns TRUE if a single-table tablespace exists in the memory cache. */ 03251 03252 ibool 03253 fil_tablespace_exists_in_mem( 03254 /*=========================*/ 03255 /* out: TRUE if exists */ 03256 ulint id) /* in: space id */ 03257 { 03258 fil_system_t* system = fil_system; 03259 fil_space_t* space; 03260 03261 ut_ad(system); 03262 03263 mutex_enter(&(system->mutex)); 03264 03265 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03266 03267 if (space == NULL) { 03268 mutex_exit(&(system->mutex)); 03269 03270 return(FALSE); 03271 } 03272 03273 mutex_exit(&(system->mutex)); 03274 03275 return(TRUE); 03276 } 03277 03278 /*********************************************************************** 03279 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory 03280 cache. Note that if we have not done a crash recovery at the database startup, 03281 there may be many tablespaces which are not yet in the memory cache. */ 03282 03283 ibool 03284 fil_space_for_table_exists_in_mem( 03285 /*==============================*/ 03286 /* out: TRUE if a matching tablespace 03287 exists in the memory cache */ 03288 ulint id, /* in: space id */ 03289 const char* name, /* in: table name in the standard 03290 'databasename/tablename' format or 03291 the dir path to a temp table */ 03292 ibool is_temp, /* in: TRUE if created with CREATE 03293 TEMPORARY TABLE */ 03294 ibool mark_space, /* in: in crash recovery, at database 03295 startup we mark all spaces which have 03296 an associated table in the InnoDB 03297 data dictionary, so that 03298 we can print a warning about orphaned 03299 tablespaces */ 03300 ibool print_error_if_does_not_exist) 03301 /* in: print detailed error 03302 information to the .err log if a 03303 matching tablespace is not found from 03304 memory */ 03305 { 03306 fil_system_t* system = fil_system; 03307 fil_space_t* namespace; 03308 fil_space_t* space; 03309 char* path; 03310 03311 ut_ad(system); 03312 03313 mutex_enter(&(system->mutex)); 03314 03315 path = fil_make_ibd_name(name, is_temp); 03316 03317 /* Look if there is a space with the same id */ 03318 03319 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03320 03321 /* Look if there is a space with the same name; the name is the 03322 directory path from the datadir to the file */ 03323 03324 HASH_SEARCH(name_hash, system->name_hash, 03325 ut_fold_string(path), namespace, 03326 0 == strcmp(namespace->name, path)); 03327 if (space && space == namespace) { 03328 /* Found */ 03329 03330 if (mark_space) { 03331 space->mark = TRUE; 03332 } 03333 03334 mem_free(path); 03335 mutex_exit(&(system->mutex)); 03336 03337 return(TRUE); 03338 } 03339 03340 if (!print_error_if_does_not_exist) { 03341 03342 mem_free(path); 03343 mutex_exit(&(system->mutex)); 03344 03345 return(FALSE); 03346 } 03347 03348 if (space == NULL) { 03349 if (namespace == NULL) { 03350 ut_print_timestamp(stderr); 03351 fputs(" InnoDB: Error: table ", stderr); 03352 ut_print_filename(stderr, name); 03353 fprintf(stderr, "\n" 03354 "InnoDB: in InnoDB data dictionary has tablespace id %lu,\n" 03355 "InnoDB: but tablespace with that id or name does not exist. Have\n" 03356 "InnoDB: you deleted or moved .ibd files?\n" 03357 "InnoDB: This may also be a table created with CREATE TEMPORARY TABLE\n" 03358 "InnoDB: whose .ibd and .frm files MySQL automatically removed, but the\n" 03359 "InnoDB: table still exists in the InnoDB internal data dictionary.\n", 03360 (ulong) id); 03361 } else { 03362 ut_print_timestamp(stderr); 03363 fputs(" InnoDB: Error: table ", stderr); 03364 ut_print_filename(stderr, name); 03365 fprintf(stderr, "\n" 03366 "InnoDB: in InnoDB data dictionary has tablespace id %lu,\n" 03367 "InnoDB: but a tablespace with that id does not exist. There is\n" 03368 "InnoDB: a tablespace of name %s and id %lu, though. Have\n" 03369 "InnoDB: you deleted or moved .ibd files?\n", 03370 (ulong) id, namespace->name, 03371 (ulong) namespace->id); 03372 } 03373 error_exit: 03374 fputs( 03375 "InnoDB: Please refer to\n" 03376 "InnoDB:" 03377 " http://dev.mysql.com/doc/mysql/en/InnoDB_troubleshooting_datadict.html\n" 03378 "InnoDB: for how to resolve the issue.\n", stderr); 03379 03380 mem_free(path); 03381 mutex_exit(&(system->mutex)); 03382 03383 return(FALSE); 03384 } 03385 03386 if (0 != strcmp(space->name, path)) { 03387 ut_print_timestamp(stderr); 03388 fputs(" InnoDB: Error: table ", stderr); 03389 ut_print_filename(stderr, name); 03390 fprintf(stderr, "\n" 03391 "InnoDB: in InnoDB data dictionary has tablespace id %lu,\n" 03392 "InnoDB: but the tablespace with that id has name %s.\n" 03393 "InnoDB: Have you deleted or moved .ibd files?\n", (ulong) id, space->name); 03394 03395 if (namespace != NULL) { 03396 fputs( 03397 "InnoDB: There is a tablespace with the right name\n" 03398 "InnoDB: ", stderr); 03399 ut_print_filename(stderr, namespace->name); 03400 fprintf(stderr, ", but its id is %lu.\n", 03401 (ulong) namespace->id); 03402 } 03403 03404 goto error_exit; 03405 } 03406 03407 mem_free(path); 03408 mutex_exit(&(system->mutex)); 03409 03410 return(FALSE); 03411 } 03412 03413 /*********************************************************************** 03414 Checks if a single-table tablespace for a given table name exists in the 03415 tablespace memory cache. */ 03416 static 03417 ulint 03418 fil_get_space_id_for_table( 03419 /*=======================*/ 03420 /* out: space id, ULINT_UNDEFINED if not 03421 found */ 03422 const char* name) /* in: table name in the standard 03423 'databasename/tablename' format */ 03424 { 03425 fil_system_t* system = fil_system; 03426 fil_space_t* namespace; 03427 ulint id = ULINT_UNDEFINED; 03428 char* path; 03429 03430 ut_ad(system); 03431 03432 mutex_enter(&(system->mutex)); 03433 03434 path = fil_make_ibd_name(name, FALSE); 03435 03436 /* Look if there is a space with the same name; the name is the 03437 directory path to the file */ 03438 03439 HASH_SEARCH(name_hash, system->name_hash, 03440 ut_fold_string(path), namespace, 03441 0 == strcmp(namespace->name, path)); 03442 if (namespace) { 03443 id = namespace->id; 03444 } 03445 03446 mem_free(path); 03447 03448 mutex_exit(&(system->mutex)); 03449 03450 return(id); 03451 } 03452 03453 /************************************************************************** 03454 Tries to extend a data file so that it would accommodate the number of pages 03455 given. The tablespace must be cached in the memory cache. If the space is big 03456 enough already, does nothing. */ 03457 03458 ibool 03459 fil_extend_space_to_desired_size( 03460 /*=============================*/ 03461 /* out: TRUE if success */ 03462 ulint* actual_size, /* out: size of the space after extension; 03463 if we ran out of disk space this may be lower 03464 than the desired size */ 03465 ulint space_id, /* in: space id */ 03466 ulint size_after_extend)/* in: desired size in pages after the 03467 extension; if the current space size is bigger 03468 than this already, the function does nothing */ 03469 { 03470 fil_system_t* system = fil_system; 03471 fil_node_t* node; 03472 fil_space_t* space; 03473 byte* buf2; 03474 byte* buf; 03475 ulint buf_size; 03476 ulint start_page_no; 03477 ulint file_start_page_no; 03478 ulint offset_high; 03479 ulint offset_low; 03480 ibool success = TRUE; 03481 03482 fil_mutex_enter_and_prepare_for_io(space_id); 03483 03484 HASH_SEARCH(hash, system->spaces, space_id, space, 03485 space->id == space_id); 03486 ut_a(space); 03487 03488 if (space->size >= size_after_extend) { 03489 /* Space already big enough */ 03490 03491 *actual_size = space->size; 03492 03493 mutex_exit(&(system->mutex)); 03494 03495 return(TRUE); 03496 } 03497 03498 node = UT_LIST_GET_LAST(space->chain); 03499 03500 fil_node_prepare_for_io(node, system, space); 03501 03502 start_page_no = space->size; 03503 file_start_page_no = space->size - node->size; 03504 03505 /* Extend at most 64 pages at a time */ 03506 buf_size = ut_min(64, size_after_extend - start_page_no) 03507 * UNIV_PAGE_SIZE; 03508 buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE); 03509 buf = ut_align(buf2, UNIV_PAGE_SIZE); 03510 03511 memset(buf, 0, buf_size); 03512 03513 while (start_page_no < size_after_extend) { 03514 ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE, 03515 size_after_extend - start_page_no); 03516 03517 offset_high = (start_page_no - file_start_page_no) 03518 / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)); 03519 offset_low = ((start_page_no - file_start_page_no) 03520 % (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE))) 03521 * UNIV_PAGE_SIZE; 03522 #ifdef UNIV_HOTBACKUP 03523 success = os_file_write(node->name, node->handle, buf, 03524 offset_low, offset_high, 03525 UNIV_PAGE_SIZE * n_pages); 03526 #else 03527 success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, 03528 node->name, node->handle, buf, 03529 offset_low, offset_high, 03530 UNIV_PAGE_SIZE * n_pages, 03531 NULL, NULL); 03532 #endif 03533 if (success) { 03534 node->size += n_pages; 03535 space->size += n_pages; 03536 03537 os_has_said_disk_full = FALSE; 03538 } else { 03539 /* Let us measure the size of the file to determine 03540 how much we were able to extend it */ 03541 03542 n_pages = ((ulint) 03543 (os_file_get_size_as_iblonglong(node->handle) 03544 / UNIV_PAGE_SIZE)) - node->size; 03545 03546 node->size += n_pages; 03547 space->size += n_pages; 03548 03549 break; 03550 } 03551 03552 start_page_no += n_pages; 03553 } 03554 03555 mem_free(buf2); 03556 03557 fil_node_complete_io(node, system, OS_FILE_WRITE); 03558 03559 *actual_size = space->size; 03560 03561 #ifndef UNIV_HOTBACKUP 03562 if (space_id == 0) { 03563 ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE; 03564 03565 /* Keep the last data file size info up to date, rounded to 03566 full megabytes */ 03567 03568 srv_data_file_sizes[srv_n_data_files - 1] = 03569 (node->size / pages_per_mb) * pages_per_mb; 03570 } 03571 #endif /* !UNIV_HOTBACKUP */ 03572 03573 /* 03574 printf("Extended %s to %lu, actual size %lu pages\n", space->name, 03575 size_after_extend, *actual_size); */ 03576 mutex_exit(&(system->mutex)); 03577 03578 fil_flush(space_id); 03579 03580 return(success); 03581 } 03582 03583 #ifdef UNIV_HOTBACKUP 03584 /************************************************************************ 03585 Extends all tablespaces to the size stored in the space header. During the 03586 ibbackup --apply-log phase we extended the spaces on-demand so that log records 03587 could be applied, but that may have left spaces still too small compared to 03588 the size stored in the space header. */ 03589 03590 void 03591 fil_extend_tablespaces_to_stored_len(void) 03592 /*======================================*/ 03593 { 03594 fil_system_t* system = fil_system; 03595 fil_space_t* space; 03596 byte* buf; 03597 ulint actual_size; 03598 ulint size_in_header; 03599 ulint error; 03600 ibool success; 03601 03602 buf = mem_alloc(UNIV_PAGE_SIZE); 03603 03604 mutex_enter(&(system->mutex)); 03605 03606 space = UT_LIST_GET_FIRST(system->space_list); 03607 03608 while (space) { 03609 ut_a(space->purpose == FIL_TABLESPACE); 03610 03611 mutex_exit(&(system->mutex)); /* no need to protect with a 03612 mutex, because this is a 03613 single-threaded operation */ 03614 error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf, 03615 NULL); 03616 ut_a(error == DB_SUCCESS); 03617 03618 size_in_header = fsp_get_size_low(buf); 03619 03620 success = fil_extend_space_to_desired_size(&actual_size, 03621 space->id, size_in_header); 03622 if (!success) { 03623 fprintf(stderr, 03624 "InnoDB: Error: could not extend the tablespace of %s\n" 03625 "InnoDB: to the size stored in header, %lu pages;\n" 03626 "InnoDB: size after extension %lu pages\n" 03627 "InnoDB: Check that you have free disk space and retry!\n", space->name, 03628 size_in_header, actual_size); 03629 exit(1); 03630 } 03631 03632 mutex_enter(&(system->mutex)); 03633 03634 space = UT_LIST_GET_NEXT(space_list, space); 03635 } 03636 03637 mutex_exit(&(system->mutex)); 03638 03639 mem_free(buf); 03640 } 03641 #endif 03642 03643 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/ 03644 03645 /*********************************************************************** 03646 Tries to reserve free extents in a file space. */ 03647 03648 ibool 03649 fil_space_reserve_free_extents( 03650 /*===========================*/ 03651 /* out: TRUE if succeed */ 03652 ulint id, /* in: space id */ 03653 ulint n_free_now, /* in: number of free extents now */ 03654 ulint n_to_reserve) /* in: how many one wants to reserve */ 03655 { 03656 fil_system_t* system = fil_system; 03657 fil_space_t* space; 03658 ibool success; 03659 03660 ut_ad(system); 03661 03662 mutex_enter(&(system->mutex)); 03663 03664 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03665 03666 ut_a(space); 03667 03668 if (space->n_reserved_extents + n_to_reserve > n_free_now) { 03669 success = FALSE; 03670 } else { 03671 space->n_reserved_extents += n_to_reserve; 03672 success = TRUE; 03673 } 03674 03675 mutex_exit(&(system->mutex)); 03676 03677 return(success); 03678 } 03679 03680 /*********************************************************************** 03681 Releases free extents in a file space. */ 03682 03683 void 03684 fil_space_release_free_extents( 03685 /*===========================*/ 03686 ulint id, /* in: space id */ 03687 ulint n_reserved) /* in: how many one reserved */ 03688 { 03689 fil_system_t* system = fil_system; 03690 fil_space_t* space; 03691 03692 ut_ad(system); 03693 03694 mutex_enter(&(system->mutex)); 03695 03696 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03697 03698 ut_a(space); 03699 ut_a(space->n_reserved_extents >= n_reserved); 03700 03701 space->n_reserved_extents -= n_reserved; 03702 03703 mutex_exit(&(system->mutex)); 03704 } 03705 03706 /*********************************************************************** 03707 Gets the number of reserved extents. If the database is silent, this number 03708 should be zero. */ 03709 03710 ulint 03711 fil_space_get_n_reserved_extents( 03712 /*=============================*/ 03713 ulint id) /* in: space id */ 03714 { 03715 fil_system_t* system = fil_system; 03716 fil_space_t* space; 03717 ulint n; 03718 03719 ut_ad(system); 03720 03721 mutex_enter(&(system->mutex)); 03722 03723 HASH_SEARCH(hash, system->spaces, id, space, space->id == id); 03724 03725 ut_a(space); 03726 03727 n = space->n_reserved_extents; 03728 03729 mutex_exit(&(system->mutex)); 03730 03731 return(n); 03732 } 03733 03734 /*============================ FILE I/O ================================*/ 03735 03736 /************************************************************************ 03737 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! 03738 03739 Prepares a file node for i/o. Opens the file if it is closed. Updates the 03740 pending i/o's field in the node and the system appropriately. Takes the node 03741 off the LRU list if it is in the LRU list. The caller must hold the fil_sys 03742 mutex. */ 03743 static 03744 void 03745 fil_node_prepare_for_io( 03746 /*====================*/ 03747 fil_node_t* node, /* in: file node */ 03748 fil_system_t* system, /* in: tablespace memory cache */ 03749 fil_space_t* space) /* in: space */ 03750 { 03751 ut_ad(node && system && space); 03752 #ifdef UNIV_SYNC_DEBUG 03753 ut_ad(mutex_own(&(system->mutex))); 03754 #endif /* UNIV_SYNC_DEBUG */ 03755 03756 if (system->n_open > system->max_n_open + 5) { 03757 ut_print_timestamp(stderr); 03758 fprintf(stderr, 03759 " InnoDB: Warning: open files %lu exceeds the limit %lu\n", 03760 (ulong) system->n_open, 03761 (ulong) system->max_n_open); 03762 } 03763 03764 if (node->open == FALSE) { 03765 /* File is closed: open it */ 03766 ut_a(node->n_pending == 0); 03767 03768 fil_node_open_file(node, system, space); 03769 } 03770 03771 if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE 03772 && space->id != 0) { 03773 /* The node is in the LRU list, remove it */ 03774 03775 ut_a(UT_LIST_GET_LEN(system->LRU) > 0); 03776 03777 UT_LIST_REMOVE(LRU, system->LRU, node); 03778 } 03779 03780 node->n_pending++; 03781 } 03782 03783 /************************************************************************ 03784 Updates the data structures when an i/o operation finishes. Updates the 03785 pending i/o's field in the node appropriately. */ 03786 static 03787 void 03788 fil_node_complete_io( 03789 /*=================*/ 03790 fil_node_t* node, /* in: file node */ 03791 fil_system_t* system, /* in: tablespace memory cache */ 03792 ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks 03793 the node as modified if 03794 type == OS_FILE_WRITE */ 03795 { 03796 ut_ad(node); 03797 ut_ad(system); 03798 #ifdef UNIV_SYNC_DEBUG 03799 ut_ad(mutex_own(&(system->mutex))); 03800 #endif /* UNIV_SYNC_DEBUG */ 03801 03802 ut_a(node->n_pending > 0); 03803 03804 node->n_pending--; 03805 03806 if (type == OS_FILE_WRITE) { 03807 system->modification_counter++; 03808 node->modification_counter = system->modification_counter; 03809 03810 if (!node->space->is_in_unflushed_spaces) { 03811 03812 node->space->is_in_unflushed_spaces = TRUE; 03813 UT_LIST_ADD_FIRST(unflushed_spaces, 03814 system->unflushed_spaces, 03815 node->space); 03816 } 03817 } 03818 03819 if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE 03820 && node->space->id != 0) { 03821 /* The node must be put back to the LRU list */ 03822 UT_LIST_ADD_FIRST(LRU, system->LRU, node); 03823 } 03824 } 03825 03826 /************************************************************************ 03827 Report information about an invalid page access. */ 03828 static 03829 void 03830 fil_report_invalid_page_access( 03831 /*===========================*/ 03832 ulint block_offset, /* in: block offset */ 03833 ulint space_id, /* in: space id */ 03834 const char* space_name, /* in: space name */ 03835 ulint byte_offset, /* in: byte offset */ 03836 ulint len, /* in: I/O length */ 03837 ulint type) /* in: I/O type */ 03838 { 03839 fprintf(stderr, 03840 "InnoDB: Error: trying to access page number %lu in space %lu,\n" 03841 "InnoDB: space name %s,\n" 03842 "InnoDB: which is outside the tablespace bounds.\n" 03843 "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n" 03844 "InnoDB: If you get this error at mysqld startup, please check that\n" 03845 "InnoDB: your my.cnf matches the ibdata files that you have in the\n" 03846 "InnoDB: MySQL server.\n", 03847 (ulong) block_offset, (ulong) space_id, space_name, 03848 (ulong) byte_offset, (ulong) len, (ulong) type); 03849 } 03850 03851 /************************************************************************ 03852 Reads or writes data. This operation is asynchronous (aio). */ 03853 03854 ulint 03855 fil_io( 03856 /*===*/ 03857 /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED 03858 if we are trying to do i/o on a tablespace 03859 which does not exist */ 03860 ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE, 03861 ORed to OS_FILE_LOG, if a log i/o 03862 and ORed to OS_AIO_SIMULATED_WAKE_LATER 03863 if simulated aio and we want to post a 03864 batch of i/os; NOTE that a simulated batch 03865 may introduce hidden chances of deadlocks, 03866 because i/os are not actually handled until 03867 all have been posted: use with great 03868 caution! */ 03869 ibool sync, /* in: TRUE if synchronous aio is desired */ 03870 ulint space_id, /* in: space id */ 03871 ulint block_offset, /* in: offset in number of blocks */ 03872 ulint byte_offset, /* in: remainder of offset in bytes; in 03873 aio this must be divisible by the OS block 03874 size */ 03875 ulint len, /* in: how many bytes to read or write; this 03876 must not cross a file boundary; in aio this 03877 must be a block size multiple */ 03878 void* buf, /* in/out: buffer where to store read data 03879 or from where to write; in aio this must be 03880 appropriately aligned */ 03881 void* message) /* in: message for aio handler if non-sync 03882 aio used, else ignored */ 03883 { 03884 fil_system_t* system = fil_system; 03885 ulint mode; 03886 fil_space_t* space; 03887 fil_node_t* node; 03888 ulint offset_high; 03889 ulint offset_low; 03890 ibool ret; 03891 ulint is_log; 03892 ulint wake_later; 03893 03894 is_log = type & OS_FILE_LOG; 03895 type = type & ~OS_FILE_LOG; 03896 03897 wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; 03898 type = type & ~OS_AIO_SIMULATED_WAKE_LATER; 03899 03900 ut_ad(byte_offset < UNIV_PAGE_SIZE); 03901 ut_ad(buf); 03902 ut_ad(len > 0); 03903 ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE); 03904 ut_ad(fil_validate()); 03905 #ifndef UNIV_LOG_DEBUG 03906 /* ibuf bitmap pages must be read in the sync aio mode: */ 03907 ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE) 03908 || !ibuf_bitmap_page(block_offset) || sync || is_log); 03909 #ifdef UNIV_SYNC_DEBUG 03910 ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE) 03911 || ibuf_page(space_id, block_offset)); 03912 #endif 03913 #endif 03914 if (sync) { 03915 mode = OS_AIO_SYNC; 03916 } else if (type == OS_FILE_READ && !is_log 03917 && ibuf_page(space_id, block_offset)) { 03918 mode = OS_AIO_IBUF; 03919 } else if (is_log) { 03920 mode = OS_AIO_LOG; 03921 } else { 03922 mode = OS_AIO_NORMAL; 03923 } 03924 03925 if (type == OS_FILE_READ) { 03926 srv_data_read+= len; 03927 } else if (type == OS_FILE_WRITE) { 03928 srv_data_written+= len; 03929 } 03930 03931 /* Reserve the fil_system mutex and make sure that we can open at 03932 least one file while holding it, if the file is not already open */ 03933 03934 fil_mutex_enter_and_prepare_for_io(space_id); 03935 03936 HASH_SEARCH(hash, system->spaces, space_id, space, 03937 space->id == space_id); 03938 if (!space) { 03939 mutex_exit(&(system->mutex)); 03940 03941 ut_print_timestamp(stderr); 03942 fprintf(stderr, 03943 " InnoDB: Error: trying to do i/o to a tablespace which does not exist.\n" 03944 "InnoDB: i/o type %lu, space id %lu, page no. %lu, i/o length %lu bytes\n", 03945 (ulong) type, (ulong) space_id, (ulong) block_offset, 03946 (ulong) len); 03947 03948 return(DB_TABLESPACE_DELETED); 03949 } 03950 03951 ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE)); 03952 03953 node = UT_LIST_GET_FIRST(space->chain); 03954 03955 for (;;) { 03956 if (node == NULL) { 03957 fil_report_invalid_page_access(block_offset, space_id, 03958 space->name, byte_offset, len, type); 03959 03960 ut_error; 03961 } 03962 03963 if (space->id != 0 && node->size == 0) { 03964 /* We do not know the size of a single-table tablespace 03965 before we open the file */ 03966 03967 break; 03968 } 03969 03970 if (node->size > block_offset) { 03971 /* Found! */ 03972 break; 03973 } else { 03974 block_offset -= node->size; 03975 node = UT_LIST_GET_NEXT(chain, node); 03976 } 03977 } 03978 03979 /* Open file if closed */ 03980 fil_node_prepare_for_io(node, system, space); 03981 03982 /* Check that at least the start offset is within the bounds of a 03983 single-table tablespace */ 03984 if (space->purpose == FIL_TABLESPACE && space->id != 0 03985 && node->size <= block_offset) { 03986 03987 fil_report_invalid_page_access(block_offset, space_id, 03988 space->name, byte_offset, len, type); 03989 03990 ut_error; 03991 } 03992 03993 /* Now we have made the changes in the data structures of system */ 03994 mutex_exit(&(system->mutex)); 03995 03996 /* Calculate the low 32 bits and the high 32 bits of the file offset */ 03997 03998 offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT)); 03999 offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL) 04000 + byte_offset; 04001 04002 ut_a(node->size - block_offset >= 04003 (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE); 04004 04005 /* Do aio */ 04006 04007 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); 04008 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); 04009 04010 #ifdef UNIV_HOTBACKUP 04011 /* In ibbackup do normal i/o, not aio */ 04012 if (type == OS_FILE_READ) { 04013 ret = os_file_read(node->handle, buf, offset_low, offset_high, 04014 len); 04015 } else { 04016 ret = os_file_write(node->name, node->handle, buf, 04017 offset_low, offset_high, len); 04018 } 04019 #else 04020 /* Queue the aio request */ 04021 ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, 04022 offset_low, offset_high, len, node, message); 04023 #endif 04024 ut_a(ret); 04025 04026 if (mode == OS_AIO_SYNC) { 04027 /* The i/o operation is already completed when we return from 04028 os_aio: */ 04029 04030 mutex_enter(&(system->mutex)); 04031 04032 fil_node_complete_io(node, system, type); 04033 04034 mutex_exit(&(system->mutex)); 04035 04036 ut_ad(fil_validate()); 04037 } 04038 04039 return(DB_SUCCESS); 04040 } 04041 04042 /************************************************************************ 04043 Reads data from a space to a buffer. Remember that the possible incomplete 04044 blocks at the end of file are ignored: they are not taken into account when 04045 calculating the byte offset within a space. */ 04046 04047 ulint 04048 fil_read( 04049 /*=====*/ 04050 /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED 04051 if we are trying to do i/o on a tablespace 04052 which does not exist */ 04053 ibool sync, /* in: TRUE if synchronous aio is desired */ 04054 ulint space_id, /* in: space id */ 04055 ulint block_offset, /* in: offset in number of blocks */ 04056 ulint byte_offset, /* in: remainder of offset in bytes; in aio 04057 this must be divisible by the OS block size */ 04058 ulint len, /* in: how many bytes to read; this must not 04059 cross a file boundary; in aio this must be a 04060 block size multiple */ 04061 void* buf, /* in/out: buffer where to store data read; 04062 in aio this must be appropriately aligned */ 04063 void* message) /* in: message for aio handler if non-sync 04064 aio used, else ignored */ 04065 { 04066 return(fil_io(OS_FILE_READ, sync, space_id, block_offset, 04067 byte_offset, len, buf, message)); 04068 } 04069 04070 /************************************************************************ 04071 Writes data to a space from a buffer. Remember that the possible incomplete 04072 blocks at the end of file are ignored: they are not taken into account when 04073 calculating the byte offset within a space. */ 04074 04075 ulint 04076 fil_write( 04077 /*======*/ 04078 /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED 04079 if we are trying to do i/o on a tablespace 04080 which does not exist */ 04081 ibool sync, /* in: TRUE if synchronous aio is desired */ 04082 ulint space_id, /* in: space id */ 04083 ulint block_offset, /* in: offset in number of blocks */ 04084 ulint byte_offset, /* in: remainder of offset in bytes; in aio 04085 this must be divisible by the OS block size */ 04086 ulint len, /* in: how many bytes to write; this must 04087 not cross a file boundary; in aio this must 04088 be a block size multiple */ 04089 void* buf, /* in: buffer from which to write; in aio 04090 this must be appropriately aligned */ 04091 void* message) /* in: message for aio handler if non-sync 04092 aio used, else ignored */ 04093 { 04094 return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset, 04095 byte_offset, len, buf, message)); 04096 } 04097 04098 /************************************************************************** 04099 Waits for an aio operation to complete. This function is used to write the 04100 handler for completed requests. The aio array of pending requests is divided 04101 into segments (see os0file.c for more info). The thread specifies which 04102 segment it wants to wait for. */ 04103 04104 void 04105 fil_aio_wait( 04106 /*=========*/ 04107 ulint segment) /* in: the number of the segment in the aio 04108 array to wait for */ 04109 { 04110 fil_system_t* system = fil_system; 04111 ibool ret; 04112 fil_node_t* fil_node; 04113 void* message; 04114 ulint type; 04115 04116 ut_ad(fil_validate()); 04117 04118 if (os_aio_use_native_aio) { 04119 srv_set_io_thread_op_info(segment, "native aio handle"); 04120 #ifdef WIN_ASYNC_IO 04121 ret = os_aio_windows_handle(segment, 0, &fil_node, 04122 &message, &type); 04123 #elif defined(POSIX_ASYNC_IO) 04124 ret = os_aio_posix_handle(segment, &fil_node, &message); 04125 #else 04126 ret = 0; /* Eliminate compiler warning */ 04127 ut_error; 04128 #endif 04129 } else { 04130 srv_set_io_thread_op_info(segment, "simulated aio handle"); 04131 04132 ret = os_aio_simulated_handle(segment, &fil_node, 04133 &message, &type); 04134 } 04135 04136 ut_a(ret); 04137 04138 srv_set_io_thread_op_info(segment, "complete io for fil node"); 04139 04140 mutex_enter(&(system->mutex)); 04141 04142 fil_node_complete_io(fil_node, fil_system, type); 04143 04144 mutex_exit(&(system->mutex)); 04145 04146 ut_ad(fil_validate()); 04147 04148 /* Do the i/o handling */ 04149 /* IMPORTANT: since i/o handling for reads will read also the insert 04150 buffer in tablespace 0, you have to be very careful not to introduce 04151 deadlocks in the i/o system. We keep tablespace 0 data files always 04152 open, and use a special i/o thread to serve insert buffer requests. */ 04153 04154 if (buf_pool_is_block(message)) { 04155 srv_set_io_thread_op_info(segment, "complete io for buf page"); 04156 buf_page_io_complete(message); 04157 } else { 04158 srv_set_io_thread_op_info(segment, "complete io for log"); 04159 log_io_complete(message); 04160 } 04161 } 04162 04163 /************************************************************************** 04164 Flushes to disk possible writes cached by the OS. If the space does not exist 04165 or is being dropped, does not do anything. */ 04166 04167 void 04168 fil_flush( 04169 /*======*/ 04170 ulint space_id) /* in: file space id (this can be a group of 04171 log files or a tablespace of the database) */ 04172 { 04173 fil_system_t* system = fil_system; 04174 fil_space_t* space; 04175 fil_node_t* node; 04176 os_file_t file; 04177 ib_longlong old_mod_counter; 04178 04179 mutex_enter(&(system->mutex)); 04180 04181 HASH_SEARCH(hash, system->spaces, space_id, space, 04182 space->id == space_id); 04183 if (!space || space->is_being_deleted) { 04184 mutex_exit(&(system->mutex)); 04185 04186 return; 04187 } 04188 04189 space->n_pending_flushes++; /* prevent dropping of the space while 04190 we are flushing */ 04191 node = UT_LIST_GET_FIRST(space->chain); 04192 04193 while (node) { 04194 if (node->modification_counter > node->flush_counter) { 04195 ut_a(node->open); 04196 04197 /* We want to flush the changes at least up to 04198 old_mod_counter */ 04199 old_mod_counter = node->modification_counter; 04200 04201 if (space->purpose == FIL_TABLESPACE) { 04202 fil_n_pending_tablespace_flushes++; 04203 } else { 04204 fil_n_pending_log_flushes++; 04205 fil_n_log_flushes++; 04206 } 04207 #ifdef __WIN__ 04208 if (node->is_raw_disk) { 04209 04210 goto skip_flush; 04211 } 04212 #endif 04213 retry: 04214 if (node->n_pending_flushes > 0) { 04215 /* We want to avoid calling os_file_flush() on 04216 the file twice at the same time, because we do 04217 not know what bugs OS's may contain in file 04218 i/o; sleep for a while */ 04219 04220 mutex_exit(&(system->mutex)); 04221 04222 os_thread_sleep(20000); 04223 04224 mutex_enter(&(system->mutex)); 04225 04226 if (node->flush_counter >= old_mod_counter) { 04227 04228 goto skip_flush; 04229 } 04230 04231 goto retry; 04232 } 04233 04234 ut_a(node->open); 04235 file = node->handle; 04236 node->n_pending_flushes++; 04237 04238 mutex_exit(&(system->mutex)); 04239 04240 /* fprintf(stderr, "Flushing to file %s\n", 04241 node->name); */ 04242 04243 os_file_flush(file); 04244 04245 mutex_enter(&(system->mutex)); 04246 04247 node->n_pending_flushes--; 04248 skip_flush: 04249 if (node->flush_counter < old_mod_counter) { 04250 node->flush_counter = old_mod_counter; 04251 04252 if (space->is_in_unflushed_spaces 04253 && fil_space_is_flushed(space)) { 04254 04255 space->is_in_unflushed_spaces = FALSE; 04256 04257 UT_LIST_REMOVE(unflushed_spaces, 04258 system->unflushed_spaces, 04259 space); 04260 } 04261 } 04262 04263 if (space->purpose == FIL_TABLESPACE) { 04264 fil_n_pending_tablespace_flushes--; 04265 } else { 04266 fil_n_pending_log_flushes--; 04267 } 04268 } 04269 04270 node = UT_LIST_GET_NEXT(chain, node); 04271 } 04272 04273 space->n_pending_flushes--; 04274 04275 mutex_exit(&(system->mutex)); 04276 } 04277 04278 /************************************************************************** 04279 Flushes to disk the writes in file spaces of the given type possibly cached by 04280 the OS. */ 04281 04282 void 04283 fil_flush_file_spaces( 04284 /*==================*/ 04285 ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ 04286 { 04287 fil_system_t* system = fil_system; 04288 fil_space_t* space; 04289 04290 mutex_enter(&(system->mutex)); 04291 04292 space = UT_LIST_GET_FIRST(system->unflushed_spaces); 04293 04294 while (space) { 04295 if (space->purpose == purpose && !space->is_being_deleted) { 04296 04297 space->n_pending_flushes++; /* prevent dropping of 04298 the space while we are 04299 flushing */ 04300 mutex_exit(&(system->mutex)); 04301 04302 fil_flush(space->id); 04303 04304 mutex_enter(&(system->mutex)); 04305 04306 space->n_pending_flushes--; 04307 } 04308 space = UT_LIST_GET_NEXT(unflushed_spaces, space); 04309 } 04310 04311 mutex_exit(&(system->mutex)); 04312 } 04313 04314 /********************************************************************** 04315 Checks the consistency of the tablespace cache. */ 04316 04317 ibool 04318 fil_validate(void) 04319 /*==============*/ 04320 /* out: TRUE if ok */ 04321 { 04322 fil_system_t* system = fil_system; 04323 fil_space_t* space; 04324 fil_node_t* fil_node; 04325 ulint n_open = 0; 04326 ulint i; 04327 04328 mutex_enter(&(system->mutex)); 04329 04330 /* Look for spaces in the hash table */ 04331 04332 for (i = 0; i < hash_get_n_cells(system->spaces); i++) { 04333 04334 space = HASH_GET_FIRST(system->spaces, i); 04335 04336 while (space != NULL) { 04337 UT_LIST_VALIDATE(chain, fil_node_t, space->chain); 04338 04339 fil_node = UT_LIST_GET_FIRST(space->chain); 04340 04341 while (fil_node != NULL) { 04342 if (fil_node->n_pending > 0) { 04343 ut_a(fil_node->open); 04344 } 04345 04346 if (fil_node->open) { 04347 n_open++; 04348 } 04349 fil_node = UT_LIST_GET_NEXT(chain, fil_node); 04350 } 04351 space = HASH_GET_NEXT(hash, space); 04352 } 04353 } 04354 04355 ut_a(system->n_open == n_open); 04356 04357 UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU); 04358 04359 fil_node = UT_LIST_GET_FIRST(system->LRU); 04360 04361 while (fil_node != NULL) { 04362 ut_a(fil_node->n_pending == 0); 04363 ut_a(fil_node->open); 04364 ut_a(fil_node->space->purpose == FIL_TABLESPACE); 04365 ut_a(fil_node->space->id != 0); 04366 04367 fil_node = UT_LIST_GET_NEXT(LRU, fil_node); 04368 } 04369 04370 mutex_exit(&(system->mutex)); 04371 04372 return(TRUE); 04373 } 04374 04375 /************************************************************************ 04376 Returns TRUE if file address is undefined. */ 04377 ibool 04378 fil_addr_is_null( 04379 /*=============*/ 04380 /* out: TRUE if undefined */ 04381 fil_addr_t addr) /* in: address */ 04382 { 04383 if (addr.page == FIL_NULL) { 04384 04385 return(TRUE); 04386 } 04387 04388 return(FALSE); 04389 } 04390 04391 /************************************************************************ 04392 Accessor functions for a file page */ 04393 04394 ulint 04395 fil_page_get_prev(byte* page) 04396 { 04397 return(mach_read_from_4(page + FIL_PAGE_PREV)); 04398 } 04399 04400 ulint 04401 fil_page_get_next(byte* page) 04402 { 04403 return(mach_read_from_4(page + FIL_PAGE_NEXT)); 04404 } 04405 04406 /************************************************************************* 04407 Sets the file page type. */ 04408 04409 void 04410 fil_page_set_type( 04411 /*==============*/ 04412 byte* page, /* in: file page */ 04413 ulint type) /* in: type */ 04414 { 04415 ut_ad(page); 04416 04417 mach_write_to_2(page + FIL_PAGE_TYPE, type); 04418 } 04419 04420 /************************************************************************* 04421 Gets the file page type. */ 04422 04423 ulint 04424 fil_page_get_type( 04425 /*==============*/ 04426 /* out: type; NOTE that if the type has not been 04427 written to page, the return value not defined */ 04428 byte* page) /* in: file page */ 04429 { 04430 ut_ad(page); 04431 04432 return(mach_read_from_2(page + FIL_PAGE_TYPE)); 04433 }
1.4.7

