00001 /* Copyright (C) 2003 MySQL AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 /* Some useful string utility functions used by the MySQL server */ 00018 00019 #include "mysql_priv.h" 00020 00021 /* 00022 Return bitmap for strings used in a set 00023 00024 SYNOPSIS 00025 find_set() 00026 lib Strings in set 00027 str Strings of set-strings separated by ',' 00028 err_pos If error, set to point to start of wrong set string 00029 err_len If error, set to the length of wrong set string 00030 set_warning Set to 1 if some string in set couldn't be used 00031 00032 NOTE 00033 We delete all end space from str before comparison 00034 00035 RETURN 00036 bitmap of all sets found in x. 00037 set_warning is set to 1 if there was any sets that couldn't be set 00038 */ 00039 00040 static const char field_separator=','; 00041 00042 ulonglong find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs, 00043 char **err_pos, uint *err_len, bool *set_warning) 00044 { 00045 CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; 00046 const char *end= str + strip->cset->lengthsp(strip, str, length); 00047 ulonglong found= 0; 00048 *err_pos= 0; // No error yet 00049 if (str != end) 00050 { 00051 const char *start= str; 00052 for (;;) 00053 { 00054 const char *pos= start; 00055 uint var_len; 00056 int mblen= 1; 00057 00058 if (cs && cs->mbminlen > 1) 00059 { 00060 for ( ; pos < end; pos+= mblen) 00061 { 00062 my_wc_t wc; 00063 if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, 00064 (const uchar *) end)) < 1) 00065 mblen= 1; // Not to hang on a wrong multibyte sequence 00066 if (wc == (my_wc_t) field_separator) 00067 break; 00068 } 00069 } 00070 else 00071 for (; pos != end && *pos != field_separator; pos++) ; 00072 var_len= (uint) (pos - start); 00073 uint find= cs ? find_type2(lib, start, var_len, cs) : 00074 find_type(lib, start, var_len, (bool) 0); 00075 if (!find) 00076 { 00077 *err_pos= (char*) start; 00078 *err_len= var_len; 00079 *set_warning= 1; 00080 } 00081 else 00082 found|= ((longlong) 1 << (find - 1)); 00083 if (pos >= end) 00084 break; 00085 start= pos + mblen; 00086 } 00087 } 00088 return found; 00089 } 00090 00091 00092 /* 00093 Function to find a string in a TYPELIB 00094 (Same format as mysys/typelib.c) 00095 00096 SYNOPSIS 00097 find_type() 00098 lib TYPELIB (struct of pointer to values + count) 00099 find String to find 00100 length Length of string to find 00101 part_match Allow part matching of value 00102 00103 RETURN 00104 0 error 00105 > 0 position in TYPELIB->type_names +1 00106 */ 00107 00108 uint find_type(TYPELIB *lib, const char *find, uint length, bool part_match) 00109 { 00110 uint found_count=0, found_pos=0; 00111 const char *end= find+length; 00112 const char *i; 00113 const char *j; 00114 for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) 00115 { 00116 for (i=find ; i != end && 00117 my_toupper(system_charset_info,*i) == 00118 my_toupper(system_charset_info,*j) ; i++, j++) ; 00119 if (i == end) 00120 { 00121 if (! *j) 00122 return(pos); 00123 found_count++; 00124 found_pos= pos; 00125 } 00126 } 00127 return(found_count == 1 && part_match ? found_pos : 0); 00128 } 00129 00130 00131 /* 00132 Find a string in a list of strings according to collation 00133 00134 SYNOPSIS 00135 find_type2() 00136 lib TYPELIB (struct of pointer to values + count) 00137 x String to find 00138 length String length 00139 cs Character set + collation to use for comparison 00140 00141 NOTES 00142 00143 RETURN 00144 0 No matching value 00145 >0 Offset+1 in typelib for matched string 00146 */ 00147 00148 uint find_type2(TYPELIB *typelib, const char *x, uint length, CHARSET_INFO *cs) 00149 { 00150 int pos; 00151 const char *j; 00152 DBUG_ENTER("find_type2"); 00153 DBUG_PRINT("enter",("x: '%.*s' lib: 0x%lx", length, x, typelib)); 00154 00155 if (!typelib->count) 00156 { 00157 DBUG_PRINT("exit",("no count")); 00158 DBUG_RETURN(0); 00159 } 00160 00161 for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) 00162 { 00163 if (!my_strnncoll(cs, (const uchar*) x, length, 00164 (const uchar*) j, typelib->type_lengths[pos])) 00165 DBUG_RETURN(pos+1); 00166 } 00167 DBUG_PRINT("exit",("Couldn't find type")); 00168 DBUG_RETURN(0); 00169 } /* find_type */ 00170 00171 00172 /* 00173 Un-hex all elements in a typelib 00174 00175 SYNOPSIS 00176 unhex_type2() 00177 interval TYPELIB (struct of pointer to values + lengths + count) 00178 00179 NOTES 00180 00181 RETURN 00182 N/A 00183 */ 00184 00185 void unhex_type2(TYPELIB *interval) 00186 { 00187 for (uint pos= 0; pos < interval->count; pos++) 00188 { 00189 char *from, *to; 00190 for (from= to= (char*) interval->type_names[pos]; *from; ) 00191 { 00192 /* 00193 Note, hexchar_to_int(*from++) doesn't work 00194 one some compilers, e.g. IRIX. Looks like a compiler 00195 bug in inline functions in combination with arguments 00196 that have a side effect. So, let's use from[0] and from[1] 00197 and increment 'from' by two later. 00198 */ 00199 00200 *to++= (char) (hexchar_to_int(from[0]) << 4) + 00201 hexchar_to_int(from[1]); 00202 from+= 2; 00203 } 00204 interval->type_lengths[pos] /= 2; 00205 } 00206 } 00207 00208 00209 /* 00210 Check if the first word in a string is one of the ones in TYPELIB 00211 00212 SYNOPSIS 00213 check_word() 00214 lib TYPELIB 00215 val String to check 00216 end End of input 00217 end_of_word Store value of last used byte here if we found word 00218 00219 RETURN 00220 0 No matching value 00221 > 1 lib->type_names[#-1] matched 00222 end_of_word will point to separator character/end in 'val' 00223 */ 00224 00225 uint check_word(TYPELIB *lib, const char *val, const char *end, 00226 const char **end_of_word) 00227 { 00228 int res; 00229 const char *ptr; 00230 00231 /* Fiend end of word */ 00232 for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) 00233 ; 00234 if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) 00235 *end_of_word= ptr; 00236 return res; 00237 } 00238 00239 00240 /* 00241 Converts a string between character sets 00242 00243 SYNOPSIS 00244 strconvert() 00245 from_cs source character set 00246 from source, a null terminated string 00247 to destination buffer 00248 to_length destination buffer length 00249 00250 NOTES 00251 'to' is always terminated with a '\0' character. 00252 If there is no enough space to convert whole string, 00253 only prefix is converted, and terminated with '\0'. 00254 00255 RETURN VALUES 00256 result string length 00257 */ 00258 00259 00260 uint strconvert(CHARSET_INFO *from_cs, const char *from, 00261 CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors) 00262 { 00263 int cnvres; 00264 my_wc_t wc; 00265 char *to_start= to; 00266 uchar *to_end= (uchar*) to + to_length - 1; 00267 int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *, 00268 const uchar *)= from_cs->cset->mb_wc; 00269 int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)= 00270 to_cs->cset->wc_mb; 00271 uint error_count= 0; 00272 00273 while (1) 00274 { 00275 /* 00276 Using 'from + 10' is safe: 00277 - it is enough to scan a single character in any character set. 00278 - if remaining string is shorter than 10, then mb_wc will return 00279 with error because of unexpected '\0' character. 00280 */ 00281 if ((cnvres= (*mb_wc)(from_cs, &wc, 00282 (uchar*) from, (uchar*) from + 10)) > 0) 00283 { 00284 if (!wc) 00285 break; 00286 from+= cnvres; 00287 } 00288 else if (cnvres == MY_CS_ILSEQ) 00289 { 00290 error_count++; 00291 from++; 00292 wc= '?'; 00293 } 00294 else 00295 break; // Impossible char. 00296 00297 outp: 00298 00299 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) 00300 to+= cnvres; 00301 else if (cnvres == MY_CS_ILUNI && wc != '?') 00302 { 00303 error_count++; 00304 wc= '?'; 00305 goto outp; 00306 } 00307 else 00308 break; 00309 } 00310 *to= '\0'; 00311 *errors= error_count; 00312 return (uint32) (to - to_start); 00313 00314 }
1.4.7

