00001 /* Copyright (C) 2000 MySQL AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 #include "mysys_priv.h" 00018 #include "mysys_err.h" 00019 #include <m_ctype.h> 00020 #include <m_string.h> 00021 #include <my_dir.h> 00022 #include <my_xml.h> 00023 00024 00025 /* 00026 The code below implements this functionality: 00027 00028 - Initializing charset related structures 00029 - Loading dynamic charsets 00030 - Searching for a proper CHARSET_INFO 00031 using charset name, collation name or collation ID 00032 - Setting server default character set 00033 */ 00034 00035 my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2) 00036 { 00037 return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname)); 00038 } 00039 00040 00041 static uint 00042 get_collation_number_internal(const char *name) 00043 { 00044 CHARSET_INFO **cs; 00045 for (cs= all_charsets; 00046 cs < all_charsets+array_elements(all_charsets)-1 ; 00047 cs++) 00048 { 00049 if ( cs[0] && cs[0]->name && 00050 !my_strcasecmp(&my_charset_latin1, cs[0]->name, name)) 00051 return cs[0]->number; 00052 } 00053 return 0; 00054 } 00055 00056 00057 static my_bool init_state_maps(CHARSET_INFO *cs) 00058 { 00059 uint i; 00060 uchar *state_map; 00061 uchar *ident_map; 00062 00063 if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) 00064 return 1; 00065 00066 if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) 00067 return 1; 00068 00069 state_map= cs->state_map; 00070 ident_map= cs->ident_map; 00071 00072 /* Fill state_map with states to get a faster parser */ 00073 for (i=0; i < 256 ; i++) 00074 { 00075 if (my_isalpha(cs,i)) 00076 state_map[i]=(uchar) MY_LEX_IDENT; 00077 else if (my_isdigit(cs,i)) 00078 state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; 00079 #if defined(USE_MB) && defined(USE_MB_IDENT) 00080 else if (my_mbcharlen(cs, i)>1) 00081 state_map[i]=(uchar) MY_LEX_IDENT; 00082 #endif 00083 else if (my_isspace(cs,i)) 00084 state_map[i]=(uchar) MY_LEX_SKIP; 00085 else 00086 state_map[i]=(uchar) MY_LEX_CHAR; 00087 } 00088 state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; 00089 state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; 00090 state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; 00091 state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; 00092 state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; 00093 state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; 00094 state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; 00095 state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON; 00096 state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; 00097 state_map[0]=(uchar) MY_LEX_EOL; 00098 state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; 00099 state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; 00100 state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; 00101 state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; 00102 state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; 00103 state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; 00104 00105 /* 00106 Create a second map to make it faster to find identifiers 00107 */ 00108 for (i=0; i < 256 ; i++) 00109 { 00110 ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || 00111 state_map[i] == MY_LEX_NUMBER_IDENT); 00112 } 00113 00114 /* Special handling of hex and binary strings */ 00115 state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; 00116 state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; 00117 state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; 00118 return 0; 00119 } 00120 00121 00122 static void simple_cs_init_functions(CHARSET_INFO *cs) 00123 { 00124 if (cs->state & MY_CS_BINSORT) 00125 cs->coll= &my_collation_8bit_bin_handler; 00126 else 00127 cs->coll= &my_collation_8bit_simple_ci_handler; 00128 00129 cs->cset= &my_charset_8bit_handler; 00130 } 00131 00132 00133 00134 static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) 00135 { 00136 to->number= from->number ? from->number : to->number; 00137 00138 if (from->csname) 00139 if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME)))) 00140 goto err; 00141 00142 if (from->name) 00143 if (!(to->name= my_once_strdup(from->name,MYF(MY_WME)))) 00144 goto err; 00145 00146 if (from->comment) 00147 if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) 00148 goto err; 00149 00150 if (from->ctype) 00151 { 00152 if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype, 00153 MY_CS_CTYPE_TABLE_SIZE, 00154 MYF(MY_WME)))) 00155 goto err; 00156 if (init_state_maps(to)) 00157 goto err; 00158 } 00159 if (from->to_lower) 00160 if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, 00161 MY_CS_TO_LOWER_TABLE_SIZE, 00162 MYF(MY_WME)))) 00163 goto err; 00164 00165 if (from->to_upper) 00166 if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper, 00167 MY_CS_TO_UPPER_TABLE_SIZE, 00168 MYF(MY_WME)))) 00169 goto err; 00170 if (from->sort_order) 00171 { 00172 if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order, 00173 MY_CS_SORT_ORDER_TABLE_SIZE, 00174 MYF(MY_WME)))) 00175 goto err; 00176 00177 } 00178 if (from->tab_to_uni) 00179 { 00180 uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); 00181 if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, 00182 sz, MYF(MY_WME)))) 00183 goto err; 00184 } 00185 if (from->tailoring) 00186 if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) 00187 goto err; 00188 00189 return 0; 00190 00191 err: 00192 return 1; 00193 } 00194 00195 00196 00197 static my_bool simple_cs_is_full(CHARSET_INFO *cs) 00198 { 00199 return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper && 00200 cs->to_lower) && 00201 (cs->number && cs->name && 00202 (cs->sort_order || (cs->state & MY_CS_BINSORT) ))); 00203 } 00204 00205 00206 static int add_collation(CHARSET_INFO *cs) 00207 { 00208 if (cs->name && (cs->number || 00209 (cs->number=get_collation_number_internal(cs->name)))) 00210 { 00211 if (!all_charsets[cs->number]) 00212 { 00213 if (!(all_charsets[cs->number]= 00214 (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) 00215 return MY_XML_ERROR; 00216 bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO)); 00217 } 00218 00219 if (cs->primary_number == cs->number) 00220 cs->state |= MY_CS_PRIMARY; 00221 00222 if (cs->binary_number == cs->number) 00223 cs->state |= MY_CS_BINSORT; 00224 00225 all_charsets[cs->number]->state|= cs->state; 00226 00227 if (!(all_charsets[cs->number]->state & MY_CS_COMPILED)) 00228 { 00229 CHARSET_INFO *new= all_charsets[cs->number]; 00230 if (cs_copy_data(all_charsets[cs->number],cs)) 00231 return MY_XML_ERROR; 00232 00233 if (!strcmp(cs->csname,"ucs2") ) 00234 { 00235 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) 00236 new->cset= my_charset_ucs2_general_uca.cset; 00237 new->coll= my_charset_ucs2_general_uca.coll; 00238 new->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply; 00239 new->min_sort_char= my_charset_ucs2_general_uca.min_sort_char; 00240 new->max_sort_char= my_charset_ucs2_general_uca.max_sort_char; 00241 new->mbminlen= 2; 00242 new->mbmaxlen= 2; 00243 new->state |= MY_CS_AVAILABLE | MY_CS_LOADED; 00244 #endif 00245 } 00246 else 00247 { 00248 uchar *sort_order= all_charsets[cs->number]->sort_order; 00249 simple_cs_init_functions(all_charsets[cs->number]); 00250 new->mbminlen= 1; 00251 new->mbmaxlen= 1; 00252 if (simple_cs_is_full(all_charsets[cs->number])) 00253 { 00254 all_charsets[cs->number]->state |= MY_CS_LOADED; 00255 } 00256 all_charsets[cs->number]->state|= MY_CS_AVAILABLE; 00257 00258 /* 00259 Check if case sensitive sort order: A < a < B. 00260 We need MY_CS_FLAG for regex library, and for 00261 case sensitivity flag for 5.0 client protocol, 00262 to support isCaseSensitive() method in JDBC driver 00263 */ 00264 if (sort_order && sort_order['A'] < sort_order['a'] && 00265 sort_order['a'] < sort_order['B']) 00266 all_charsets[cs->number]->state|= MY_CS_CSSORT; 00267 } 00268 } 00269 else 00270 { 00271 /* 00272 We need the below to make get_charset_name() 00273 and get_charset_number() working even if a 00274 character set has not been really incompiled. 00275 The above functions are used for example 00276 in error message compiler extra/comp_err.c. 00277 If a character set was compiled, this information 00278 will get lost and overwritten in add_compiled_collation(). 00279 */ 00280 CHARSET_INFO *dst= all_charsets[cs->number]; 00281 dst->number= cs->number; 00282 if (cs->comment) 00283 if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) 00284 return MY_XML_ERROR; 00285 if (cs->csname) 00286 if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME)))) 00287 return MY_XML_ERROR; 00288 if (cs->name) 00289 if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME)))) 00290 return MY_XML_ERROR; 00291 } 00292 cs->number= 0; 00293 cs->primary_number= 0; 00294 cs->binary_number= 0; 00295 cs->name= NULL; 00296 cs->state= 0; 00297 cs->sort_order= NULL; 00298 cs->state= 0; 00299 } 00300 return MY_XML_OK; 00301 } 00302 00303 00304 #define MY_MAX_ALLOWED_BUF 1024*1024 00305 #define MY_CHARSET_INDEX "Index.xml" 00306 00307 const char *charsets_dir= NULL; 00308 static int charset_initialized=0; 00309 00310 00311 static my_bool my_read_charset_file(const char *filename, myf myflags) 00312 { 00313 char *buf; 00314 int fd; 00315 uint len; 00316 MY_STAT stat_info; 00317 00318 if (!my_stat(filename, &stat_info, MYF(myflags)) || 00319 ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) || 00320 !(buf= (char *)my_malloc(len,myflags))) 00321 return TRUE; 00322 00323 if ((fd=my_open(filename,O_RDONLY,myflags)) < 0) 00324 { 00325 my_free(buf,myflags); 00326 return TRUE; 00327 } 00328 len=read(fd,buf,len); 00329 my_close(fd,myflags); 00330 00331 if (my_parse_charset_xml(buf,len,add_collation)) 00332 { 00333 #ifdef NOT_YET 00334 printf("ERROR at line %d pos %d '%s'\n", 00335 my_xml_error_lineno(&p)+1, 00336 my_xml_error_pos(&p), 00337 my_xml_error_string(&p)); 00338 #endif 00339 } 00340 00341 my_free(buf, myflags); 00342 return FALSE; 00343 } 00344 00345 00346 char *get_charsets_dir(char *buf) 00347 { 00348 const char *sharedir= SHAREDIR; 00349 char *res; 00350 DBUG_ENTER("get_charsets_dir"); 00351 00352 if (charsets_dir != NULL) 00353 strmake(buf, charsets_dir, FN_REFLEN-1); 00354 else 00355 { 00356 if (test_if_hard_path(sharedir) || 00357 is_prefix(sharedir, DEFAULT_CHARSET_HOME)) 00358 strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); 00359 else 00360 strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, 00361 NullS); 00362 } 00363 res= convert_dirname(buf,buf,NullS); 00364 DBUG_PRINT("info",("charsets dir: '%s'", buf)); 00365 DBUG_RETURN(res); 00366 } 00367 00368 CHARSET_INFO *all_charsets[256]; 00369 CHARSET_INFO *default_charset_info = &my_charset_latin1; 00370 00371 void add_compiled_collation(CHARSET_INFO *cs) 00372 { 00373 all_charsets[cs->number]= cs; 00374 cs->state|= MY_CS_AVAILABLE; 00375 } 00376 00377 static void *cs_alloc(uint size) 00378 { 00379 return my_once_alloc(size, MYF(MY_WME)); 00380 } 00381 00382 00383 #ifdef __NETWARE__ 00384 my_bool STDCALL init_available_charsets(myf myflags) 00385 #else 00386 static my_bool init_available_charsets(myf myflags) 00387 #endif 00388 { 00389 char fname[FN_REFLEN]; 00390 my_bool error=FALSE; 00391 /* 00392 We have to use charset_initialized to not lock on THR_LOCK_charset 00393 inside get_internal_charset... 00394 */ 00395 if (!charset_initialized) 00396 { 00397 CHARSET_INFO **cs; 00398 /* 00399 To make things thread safe we are not allowing other threads to interfere 00400 while we may changing the cs_info_table 00401 */ 00402 pthread_mutex_lock(&THR_LOCK_charset); 00403 if (!charset_initialized) 00404 { 00405 bzero(&all_charsets,sizeof(all_charsets)); 00406 init_compiled_charsets(myflags); 00407 00408 /* Copy compiled charsets */ 00409 for (cs=all_charsets; 00410 cs < all_charsets+array_elements(all_charsets)-1 ; 00411 cs++) 00412 { 00413 if (*cs) 00414 { 00415 if (cs[0]->ctype) 00416 if (init_state_maps(*cs)) 00417 *cs= NULL; 00418 } 00419 } 00420 00421 strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); 00422 error= my_read_charset_file(fname,myflags); 00423 charset_initialized=1; 00424 } 00425 pthread_mutex_unlock(&THR_LOCK_charset); 00426 } 00427 return error; 00428 } 00429 00430 00431 void free_charsets(void) 00432 { 00433 charset_initialized=0; 00434 } 00435 00436 00437 uint get_collation_number(const char *name) 00438 { 00439 init_available_charsets(MYF(0)); 00440 return get_collation_number_internal(name); 00441 } 00442 00443 00444 uint get_charset_number(const char *charset_name, uint cs_flags) 00445 { 00446 CHARSET_INFO **cs; 00447 init_available_charsets(MYF(0)); 00448 00449 for (cs= all_charsets; 00450 cs < all_charsets+array_elements(all_charsets)-1 ; 00451 cs++) 00452 { 00453 if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && 00454 !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name)) 00455 return cs[0]->number; 00456 } 00457 return 0; 00458 } 00459 00460 00461 const char *get_charset_name(uint charset_number) 00462 { 00463 CHARSET_INFO *cs; 00464 init_available_charsets(MYF(0)); 00465 00466 cs=all_charsets[charset_number]; 00467 if (cs && (cs->number == charset_number) && cs->name ) 00468 return (char*) cs->name; 00469 00470 return (char*) "?"; /* this mimics find_type() */ 00471 } 00472 00473 00474 static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) 00475 { 00476 char buf[FN_REFLEN]; 00477 CHARSET_INFO *cs; 00478 /* 00479 To make things thread safe we are not allowing other threads to interfere 00480 while we may changing the cs_info_table 00481 */ 00482 pthread_mutex_lock(&THR_LOCK_charset); 00483 if ((cs= all_charsets[cs_number])) 00484 { 00485 if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED)) 00486 { 00487 strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS); 00488 my_read_charset_file(buf,flags); 00489 } 00490 cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL; 00491 } 00492 if (cs && !(cs->state & MY_CS_READY)) 00493 { 00494 if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) || 00495 (cs->coll->init && cs->coll->init(cs, cs_alloc))) 00496 cs= NULL; 00497 else 00498 cs->state|= MY_CS_READY; 00499 } 00500 pthread_mutex_unlock(&THR_LOCK_charset); 00501 return cs; 00502 } 00503 00504 00505 CHARSET_INFO *get_charset(uint cs_number, myf flags) 00506 { 00507 CHARSET_INFO *cs; 00508 if (cs_number == default_charset_info->number) 00509 return default_charset_info; 00510 00511 (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ 00512 00513 if (!cs_number || cs_number >= array_elements(all_charsets)-1) 00514 return NULL; 00515 00516 cs=get_internal_charset(cs_number, flags); 00517 00518 if (!cs && (flags & MY_WME)) 00519 { 00520 char index_file[FN_REFLEN], cs_string[23]; 00521 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); 00522 cs_string[0]='#'; 00523 int10_to_str(cs_number, cs_string+1, 10); 00524 my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); 00525 } 00526 return cs; 00527 } 00528 00529 CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) 00530 { 00531 uint cs_number; 00532 CHARSET_INFO *cs; 00533 (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ 00534 00535 cs_number=get_collation_number(cs_name); 00536 cs= cs_number ? get_internal_charset(cs_number,flags) : NULL; 00537 00538 if (!cs && (flags & MY_WME)) 00539 { 00540 char index_file[FN_REFLEN]; 00541 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); 00542 my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file); 00543 } 00544 00545 return cs; 00546 } 00547 00548 00549 CHARSET_INFO *get_charset_by_csname(const char *cs_name, 00550 uint cs_flags, 00551 myf flags) 00552 { 00553 uint cs_number; 00554 CHARSET_INFO *cs; 00555 DBUG_ENTER("get_charset_by_csname"); 00556 DBUG_PRINT("enter",("name: '%s'", cs_name)); 00557 00558 (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ 00559 00560 cs_number= get_charset_number(cs_name, cs_flags); 00561 cs= cs_number ? get_internal_charset(cs_number, flags) : NULL; 00562 00563 if (!cs && (flags & MY_WME)) 00564 { 00565 char index_file[FN_REFLEN]; 00566 strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); 00567 my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); 00568 } 00569 00570 DBUG_RETURN(cs); 00571 } 00572 00573 00574 /* 00575 Escape string with backslashes (\) 00576 00577 SYNOPSIS 00578 escape_string_for_mysql() 00579 charset_info Charset of the strings 00580 to Buffer for escaped string 00581 to_length Length of destination buffer, or 0 00582 from The string to escape 00583 length The length of the string to escape 00584 00585 DESCRIPTION 00586 This escapes the contents of a string by adding backslashes before special 00587 characters, and turning others into specific escape sequences, such as 00588 turning newlines into \n and null bytes into \0. 00589 00590 NOTE 00591 To maintain compatibility with the old C API, to_length may be 0 to mean 00592 "big enough" 00593 00594 RETURN VALUES 00595 ~0 The escaped string did not fit in the to buffer 00596 >=0 The length of the escaped string 00597 */ 00598 00599 ulong escape_string_for_mysql(CHARSET_INFO *charset_info, 00600 char *to, ulong to_length, 00601 const char *from, ulong length) 00602 { 00603 const char *to_start= to; 00604 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); 00605 my_bool overflow= FALSE; 00606 #ifdef USE_MB 00607 my_bool use_mb_flag= use_mb(charset_info); 00608 #endif 00609 for (end= from + length; from < end; from++) 00610 { 00611 char escape= 0; 00612 #ifdef USE_MB 00613 int tmp_length; 00614 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) 00615 { 00616 if (to + tmp_length > to_end) 00617 { 00618 overflow= TRUE; 00619 break; 00620 } 00621 while (tmp_length--) 00622 *to++= *from++; 00623 from--; 00624 continue; 00625 } 00626 /* 00627 If the next character appears to begin a multi-byte character, we 00628 escape that first byte of that apparent multi-byte character. (The 00629 character just looks like a multi-byte character -- if it were actually 00630 a multi-byte character, it would have been passed through in the test 00631 above.) 00632 00633 Without this check, we can create a problem by converting an invalid 00634 multi-byte character into a valid one. For example, 0xbf27 is not 00635 a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) 00636 */ 00637 if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1) 00638 escape= *from; 00639 else 00640 #endif 00641 switch (*from) { 00642 case 0: /* Must be escaped for 'mysql' */ 00643 escape= '0'; 00644 break; 00645 case '\n': /* Must be escaped for logs */ 00646 escape= 'n'; 00647 break; 00648 case '\r': 00649 escape= 'r'; 00650 break; 00651 case '\\': 00652 escape= '\\'; 00653 break; 00654 case '\'': 00655 escape= '\''; 00656 break; 00657 case '"': /* Better safe than sorry */ 00658 escape= '"'; 00659 break; 00660 case '\032': /* This gives problems on Win32 */ 00661 escape= 'Z'; 00662 break; 00663 } 00664 if (escape) 00665 { 00666 if (to + 2 > to_end) 00667 { 00668 overflow= TRUE; 00669 break; 00670 } 00671 *to++= '\\'; 00672 *to++= escape; 00673 } 00674 else 00675 { 00676 if (to + 1 > to_end) 00677 { 00678 overflow= TRUE; 00679 break; 00680 } 00681 *to++= *from; 00682 } 00683 } 00684 *to= 0; 00685 return overflow ? (ulong)~0 : (ulong) (to - to_start); 00686 } 00687 00688 00689 #ifdef BACKSLASH_MBTAIL 00690 static CHARSET_INFO *fs_cset_cache= NULL; 00691 00692 CHARSET_INFO *fs_character_set() 00693 { 00694 if (!fs_cset_cache) 00695 { 00696 char buf[10]= "cp"; 00697 GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, 00698 buf+2, sizeof(buf)-3); 00699 /* 00700 We cannot call get_charset_by_name here 00701 because fs_character_set() is executed before 00702 LOCK_THD_charset mutex initialization, which 00703 is used inside get_charset_by_name. 00704 As we're now interested in cp932 only, 00705 let's just detect it using strcmp(). 00706 */ 00707 fs_cset_cache= !strcmp(buf, "cp932") ? 00708 &my_charset_cp932_japanese_ci : &my_charset_bin; 00709 } 00710 return fs_cset_cache; 00711 } 00712 #endif 00713 00714 /* 00715 Escape apostrophes by doubling them up 00716 00717 SYNOPSIS 00718 escape_quotes_for_mysql() 00719 charset_info Charset of the strings 00720 to Buffer for escaped string 00721 to_length Length of destination buffer, or 0 00722 from The string to escape 00723 length The length of the string to escape 00724 00725 DESCRIPTION 00726 This escapes the contents of a string by doubling up any apostrophes that 00727 it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in 00728 effect on the server. 00729 00730 NOTE 00731 To be consistent with escape_string_for_mysql(), to_length may be 0 to 00732 mean "big enough" 00733 00734 RETURN VALUES 00735 ~0 The escaped string did not fit in the to buffer 00736 >=0 The length of the escaped string 00737 */ 00738 00739 ulong escape_quotes_for_mysql(CHARSET_INFO *charset_info, 00740 char *to, ulong to_length, 00741 const char *from, ulong length) 00742 { 00743 const char *to_start= to; 00744 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); 00745 my_bool overflow= FALSE; 00746 #ifdef USE_MB 00747 my_bool use_mb_flag= use_mb(charset_info); 00748 #endif 00749 for (end= from + length; from < end; from++) 00750 { 00751 #ifdef USE_MB 00752 int tmp_length; 00753 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) 00754 { 00755 if (to + tmp_length > to_end) 00756 { 00757 overflow= TRUE; 00758 break; 00759 } 00760 while (tmp_length--) 00761 *to++= *from++; 00762 from--; 00763 continue; 00764 } 00765 /* 00766 We don't have the same issue here with a non-multi-byte character being 00767 turned into a multi-byte character by the addition of an escaping 00768 character, because we are only escaping the ' character with itself. 00769 */ 00770 #endif 00771 if (*from == '\'') 00772 { 00773 if (to + 2 > to_end) 00774 { 00775 overflow= TRUE; 00776 break; 00777 } 00778 *to++= '\''; 00779 *to++= '\''; 00780 } 00781 else 00782 { 00783 if (to + 1 > to_end) 00784 { 00785 overflow= TRUE; 00786 break; 00787 } 00788 *to++= *from; 00789 } 00790 } 00791 *to= 0; 00792 return overflow ? (ulong)~0 : (ulong) (to - to_start); 00793 }
1.4.7

