00001 /* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ 00018 00019 #include "ftdefs.h" 00020 00021 ulong ft_min_word_len=4; 00022 ulong ft_max_word_len=HA_FT_MAXCHARLEN; 00023 ulong ft_query_expansion_limit=5; 00024 char ft_boolean_syntax[]="+ -><()~*:\"\"&|"; 00025 00026 const HA_KEYSEG ft_keysegs[FT_SEGS]={ 00027 { 00028 0, /* charset */ 00029 HA_FT_WLEN, /* start */ 00030 0, /* null_pos */ 00031 0, /* Bit pos */ 00032 HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */ 00033 HA_FT_MAXBYTELEN, /* length */ 00034 HA_KEYTYPE_VARTEXT2, /* type */ 00035 63, /* language (will be overwritten) */ 00036 0, /* null_bit */ 00037 2, 0, 0 /* bit_start, bit_end, bit_length */ 00038 }, 00039 { 00040 /* 00041 Note, this (and the last HA_KEYTYPE_END) segment should NOT 00042 be packed in any way, otherwise w_search() won't be able to 00043 update key entry 'in vivo' 00044 */ 00045 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0 00046 } 00047 }; 00048 00049 const struct _ft_vft _ft_vft_nlq = { 00050 ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search, 00051 ft_nlq_get_relevance, ft_nlq_reinit_search 00052 }; 00053 const struct _ft_vft _ft_vft_boolean = { 00054 ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search, 00055 ft_boolean_get_relevance, ft_boolean_reinit_search 00056 }; 00057 00058 00059 FT_INFO *ft_init_search(uint flags, void *info, uint keynr, 00060 byte *query, uint query_len, CHARSET_INFO *cs, 00061 byte *record) 00062 { 00063 FT_INFO *res; 00064 if (flags & FT_BOOL) 00065 res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); 00066 else 00067 res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, 00068 record); 00069 return res; 00070 } 00071 00072 const char *ft_stopword_file = 0; 00073 const char *ft_precompiled_stopwords[] = { 00074 00075 #ifdef COMPILE_STOPWORDS_IN 00076 00077 /* This particular stopword list was taken from SMART distribution 00078 ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z 00079 it was slightly modified to my taste, though 00080 */ 00081 00082 "a's", 00083 "able", 00084 "about", 00085 "above", 00086 "according", 00087 "accordingly", 00088 "across", 00089 "actually", 00090 "after", 00091 "afterwards", 00092 "again", 00093 "against", 00094 "ain't", 00095 "all", 00096 "allow", 00097 "allows", 00098 "almost", 00099 "alone", 00100 "along", 00101 "already", 00102 "also", 00103 "although", 00104 "always", 00105 "am", 00106 "among", 00107 "amongst", 00108 "an", 00109 "and", 00110 "another", 00111 "any", 00112 "anybody", 00113 "anyhow", 00114 "anyone", 00115 "anything", 00116 "anyway", 00117 "anyways", 00118 "anywhere", 00119 "apart", 00120 "appear", 00121 "appreciate", 00122 "appropriate", 00123 "are", 00124 "aren't", 00125 "around", 00126 "as", 00127 "aside", 00128 "ask", 00129 "asking", 00130 "associated", 00131 "at", 00132 "available", 00133 "away", 00134 "awfully", 00135 "be", 00136 "became", 00137 "because", 00138 "become", 00139 "becomes", 00140 "becoming", 00141 "been", 00142 "before", 00143 "beforehand", 00144 "behind", 00145 "being", 00146 "believe", 00147 "below", 00148 "beside", 00149 "besides", 00150 "best", 00151 "better", 00152 "between", 00153 "beyond", 00154 "both", 00155 "brief", 00156 "but", 00157 "by", 00158 "c'mon", 00159 "c's", 00160 "came", 00161 "can", 00162 "can't", 00163 "cannot", 00164 "cant", 00165 "cause", 00166 "causes", 00167 "certain", 00168 "certainly", 00169 "changes", 00170 "clearly", 00171 "co", 00172 "com", 00173 "come", 00174 "comes", 00175 "concerning", 00176 "consequently", 00177 "consider", 00178 "considering", 00179 "contain", 00180 "containing", 00181 "contains", 00182 "corresponding", 00183 "could", 00184 "couldn't", 00185 "course", 00186 "currently", 00187 "definitely", 00188 "described", 00189 "despite", 00190 "did", 00191 "didn't", 00192 "different", 00193 "do", 00194 "does", 00195 "doesn't", 00196 "doing", 00197 "don't", 00198 "done", 00199 "down", 00200 "downwards", 00201 "during", 00202 "each", 00203 "edu", 00204 "eg", 00205 "eight", 00206 "either", 00207 "else", 00208 "elsewhere", 00209 "enough", 00210 "entirely", 00211 "especially", 00212 "et", 00213 "etc", 00214 "even", 00215 "ever", 00216 "every", 00217 "everybody", 00218 "everyone", 00219 "everything", 00220 "everywhere", 00221 "ex", 00222 "exactly", 00223 "example", 00224 "except", 00225 "far", 00226 "few", 00227 "fifth", 00228 "first", 00229 "five", 00230 "followed", 00231 "following", 00232 "follows", 00233 "for", 00234 "former", 00235 "formerly", 00236 "forth", 00237 "four", 00238 "from", 00239 "further", 00240 "furthermore", 00241 "get", 00242 "gets", 00243 "getting", 00244 "given", 00245 "gives", 00246 "go", 00247 "goes", 00248 "going", 00249 "gone", 00250 "got", 00251 "gotten", 00252 "greetings", 00253 "had", 00254 "hadn't", 00255 "happens", 00256 "hardly", 00257 "has", 00258 "hasn't", 00259 "have", 00260 "haven't", 00261 "having", 00262 "he", 00263 "he's", 00264 "hello", 00265 "help", 00266 "hence", 00267 "her", 00268 "here", 00269 "here's", 00270 "hereafter", 00271 "hereby", 00272 "herein", 00273 "hereupon", 00274 "hers", 00275 "herself", 00276 "hi", 00277 "him", 00278 "himself", 00279 "his", 00280 "hither", 00281 "hopefully", 00282 "how", 00283 "howbeit", 00284 "however", 00285 "i'd", 00286 "i'll", 00287 "i'm", 00288 "i've", 00289 "ie", 00290 "if", 00291 "ignored", 00292 "immediate", 00293 "in", 00294 "inasmuch", 00295 "inc", 00296 "indeed", 00297 "indicate", 00298 "indicated", 00299 "indicates", 00300 "inner", 00301 "insofar", 00302 "instead", 00303 "into", 00304 "inward", 00305 "is", 00306 "isn't", 00307 "it", 00308 "it'd", 00309 "it'll", 00310 "it's", 00311 "its", 00312 "itself", 00313 "just", 00314 "keep", 00315 "keeps", 00316 "kept", 00317 "know", 00318 "knows", 00319 "known", 00320 "last", 00321 "lately", 00322 "later", 00323 "latter", 00324 "latterly", 00325 "least", 00326 "less", 00327 "lest", 00328 "let", 00329 "let's", 00330 "like", 00331 "liked", 00332 "likely", 00333 "little", 00334 "look", 00335 "looking", 00336 "looks", 00337 "ltd", 00338 "mainly", 00339 "many", 00340 "may", 00341 "maybe", 00342 "me", 00343 "mean", 00344 "meanwhile", 00345 "merely", 00346 "might", 00347 "more", 00348 "moreover", 00349 "most", 00350 "mostly", 00351 "much", 00352 "must", 00353 "my", 00354 "myself", 00355 "name", 00356 "namely", 00357 "nd", 00358 "near", 00359 "nearly", 00360 "necessary", 00361 "need", 00362 "needs", 00363 "neither", 00364 "never", 00365 "nevertheless", 00366 "new", 00367 "next", 00368 "nine", 00369 "no", 00370 "nobody", 00371 "non", 00372 "none", 00373 "noone", 00374 "nor", 00375 "normally", 00376 "not", 00377 "nothing", 00378 "novel", 00379 "now", 00380 "nowhere", 00381 "obviously", 00382 "of", 00383 "off", 00384 "often", 00385 "oh", 00386 "ok", 00387 "okay", 00388 "old", 00389 "on", 00390 "once", 00391 "one", 00392 "ones", 00393 "only", 00394 "onto", 00395 "or", 00396 "other", 00397 "others", 00398 "otherwise", 00399 "ought", 00400 "our", 00401 "ours", 00402 "ourselves", 00403 "out", 00404 "outside", 00405 "over", 00406 "overall", 00407 "own", 00408 "particular", 00409 "particularly", 00410 "per", 00411 "perhaps", 00412 "placed", 00413 "please", 00414 "plus", 00415 "possible", 00416 "presumably", 00417 "probably", 00418 "provides", 00419 "que", 00420 "quite", 00421 "qv", 00422 "rather", 00423 "rd", 00424 "re", 00425 "really", 00426 "reasonably", 00427 "regarding", 00428 "regardless", 00429 "regards", 00430 "relatively", 00431 "respectively", 00432 "right", 00433 "said", 00434 "same", 00435 "saw", 00436 "say", 00437 "saying", 00438 "says", 00439 "second", 00440 "secondly", 00441 "see", 00442 "seeing", 00443 "seem", 00444 "seemed", 00445 "seeming", 00446 "seems", 00447 "seen", 00448 "self", 00449 "selves", 00450 "sensible", 00451 "sent", 00452 "serious", 00453 "seriously", 00454 "seven", 00455 "several", 00456 "shall", 00457 "she", 00458 "should", 00459 "shouldn't", 00460 "since", 00461 "six", 00462 "so", 00463 "some", 00464 "somebody", 00465 "somehow", 00466 "someone", 00467 "something", 00468 "sometime", 00469 "sometimes", 00470 "somewhat", 00471 "somewhere", 00472 "soon", 00473 "sorry", 00474 "specified", 00475 "specify", 00476 "specifying", 00477 "still", 00478 "sub", 00479 "such", 00480 "sup", 00481 "sure", 00482 "t's", 00483 "take", 00484 "taken", 00485 "tell", 00486 "tends", 00487 "th", 00488 "than", 00489 "thank", 00490 "thanks", 00491 "thanx", 00492 "that", 00493 "that's", 00494 "thats", 00495 "the", 00496 "their", 00497 "theirs", 00498 "them", 00499 "themselves", 00500 "then", 00501 "thence", 00502 "there", 00503 "there's", 00504 "thereafter", 00505 "thereby", 00506 "therefore", 00507 "therein", 00508 "theres", 00509 "thereupon", 00510 "these", 00511 "they", 00512 "they'd", 00513 "they'll", 00514 "they're", 00515 "they've", 00516 "think", 00517 "third", 00518 "this", 00519 "thorough", 00520 "thoroughly", 00521 "those", 00522 "though", 00523 "three", 00524 "through", 00525 "throughout", 00526 "thru", 00527 "thus", 00528 "to", 00529 "together", 00530 "too", 00531 "took", 00532 "toward", 00533 "towards", 00534 "tried", 00535 "tries", 00536 "truly", 00537 "try", 00538 "trying", 00539 "twice", 00540 "two", 00541 "un", 00542 "under", 00543 "unfortunately", 00544 "unless", 00545 "unlikely", 00546 "until", 00547 "unto", 00548 "up", 00549 "upon", 00550 "us", 00551 "use", 00552 "used", 00553 "useful", 00554 "uses", 00555 "using", 00556 "usually", 00557 "value", 00558 "various", 00559 "very", 00560 "via", 00561 "viz", 00562 "vs", 00563 "want", 00564 "wants", 00565 "was", 00566 "wasn't", 00567 "way", 00568 "we", 00569 "we'd", 00570 "we'll", 00571 "we're", 00572 "we've", 00573 "welcome", 00574 "well", 00575 "went", 00576 "were", 00577 "weren't", 00578 "what", 00579 "what's", 00580 "whatever", 00581 "when", 00582 "whence", 00583 "whenever", 00584 "where", 00585 "where's", 00586 "whereafter", 00587 "whereas", 00588 "whereby", 00589 "wherein", 00590 "whereupon", 00591 "wherever", 00592 "whether", 00593 "which", 00594 "while", 00595 "whither", 00596 "who", 00597 "who's", 00598 "whoever", 00599 "whole", 00600 "whom", 00601 "whose", 00602 "why", 00603 "will", 00604 "willing", 00605 "wish", 00606 "with", 00607 "within", 00608 "without", 00609 "won't", 00610 "wonder", 00611 "would", 00612 "would", 00613 "wouldn't", 00614 "yes", 00615 "yet", 00616 "you", 00617 "you'd", 00618 "you'll", 00619 "you're", 00620 "you've", 00621 "your", 00622 "yours", 00623 "yourself", 00624 "yourselves", 00625 "zero", 00626 #endif 00627 00628 NULL }; 00629 00630 static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param) 00631 { 00632 return param->mysql_parse(param, param->doc, param->length); 00633 } 00634 00635 struct st_mysql_ftparser ft_default_parser= 00636 { 00637 MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0 00638 }; 00639
1.4.7

