00001 /* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 /* Written by Sergei A. Golubchik, who has a shared copyright to this code 00018 added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ 00019 00020 #include "ftdefs.h" 00021 #include <my_getopt.h> 00022 00023 static void usage(); 00024 static void complain(int val); 00025 static my_bool get_one_option(int, const struct my_option *, char *); 00026 00027 static int count=0, stats=0, dump=0, lstats=0; 00028 static my_bool verbose; 00029 static char *query=NULL; 00030 static uint lengths[256]; 00031 00032 #define MAX_LEN (HA_FT_MAXBYTELEN+10) 00033 #define HOW_OFTEN_TO_WRITE 10000 00034 00035 static struct my_option my_long_options[] = 00036 { 00037 {"help", 'h', "Display help and exit.", 00038 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00039 {"help", '?', "Synonym for -h.", 00040 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00041 {"count", 'c', "Calculate per-word stats (counts and global weights).", 00042 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00043 {"dump", 'd', "Dump index (incl. data offsets and word weights).", 00044 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00045 {"length", 'l', "Report length distribution.", 00046 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00047 {"stats", 's', "Report global stats.", 00048 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, 00049 {"verbose", 'v', "Be verbose.", 00050 (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, 00051 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} 00052 }; 00053 00054 00055 int main(int argc,char *argv[]) 00056 { 00057 int error=0, subkeys; 00058 uint keylen, keylen2=0, inx, doc_cnt=0; 00059 float weight= 1.0; 00060 double gws, min_gws=0, avg_gws=0; 00061 MI_INFO *info; 00062 char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; 00063 ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; 00064 struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ 00065 00066 MY_INIT(argv[0]); 00067 if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) 00068 exit(error); 00069 if (count || dump) 00070 verbose=0; 00071 if (!count && !dump && !lstats && !query) 00072 stats=1; 00073 00074 if (verbose) 00075 setbuf(stdout,NULL); 00076 00077 if (argc < 2) 00078 usage(); 00079 00080 { 00081 char *end; 00082 inx= (uint) strtoll(argv[1], &end, 10); 00083 if (*end) 00084 usage(); 00085 } 00086 00087 init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); 00088 00089 if (!(info=mi_open(argv[0], O_RDONLY, 00090 HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) 00091 { 00092 error=my_errno; 00093 goto err; 00094 } 00095 00096 *buf2=0; 00097 aio->info=info; 00098 00099 if ((inx >= info->s->base.keys) || 00100 !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) 00101 { 00102 printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename); 00103 goto err; 00104 } 00105 00106 mi_lock_database(info, F_EXTRA_LCK); 00107 00108 info->lastpos= HA_OFFSET_ERROR; 00109 info->update|= HA_STATE_PREV_FOUND; 00110 00111 while (!(error=mi_rnext(info,NULL,inx))) 00112 { 00113 keylen=*(info->lastkey); 00114 00115 subkeys=ft_sintXkorr(info->lastkey+keylen+1); 00116 if (subkeys >= 0) 00117 weight=*(float*)&subkeys; 00118 00119 #ifdef HAVE_SNPRINTF 00120 snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); 00121 #else 00122 sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); 00123 #endif 00124 my_casedn_str(default_charset_info,buf); 00125 total++; 00126 lengths[keylen]++; 00127 00128 if (count || stats) 00129 { 00130 doc_cnt++; 00131 if (strcmp(buf, buf2)) 00132 { 00133 if (*buf2) 00134 { 00135 uniq++; 00136 avg_gws+=gws=GWS_IN_USE; 00137 if (count) 00138 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); 00139 if (maxlen<keylen2) 00140 { 00141 maxlen=keylen2; 00142 strmov(buf_maxlen, buf2); 00143 } 00144 if (max_doc_cnt < doc_cnt) 00145 { 00146 max_doc_cnt=doc_cnt; 00147 strmov(buf_min_gws, buf2); 00148 min_gws=gws; 00149 } 00150 } 00151 strmov(buf2, buf); 00152 keylen2=keylen; 00153 doc_cnt=0; 00154 } 00155 } 00156 if (dump) 00157 { 00158 if (subkeys>=0) 00159 printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); 00160 else 00161 printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); 00162 } 00163 if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) 00164 printf("%10ld\r",total); 00165 } 00166 mi_lock_database(info, F_UNLCK); 00167 00168 if (count || stats) 00169 { 00170 doc_cnt++; 00171 if (*buf2) 00172 { 00173 uniq++; 00174 avg_gws+=gws=GWS_IN_USE; 00175 if (count) 00176 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); 00177 if (maxlen<keylen2) 00178 { 00179 maxlen=keylen2; 00180 strmov(buf_maxlen, buf2); 00181 } 00182 if (max_doc_cnt < doc_cnt) 00183 { 00184 max_doc_cnt=doc_cnt; 00185 strmov(buf_min_gws, buf2); 00186 min_gws=gws; 00187 } 00188 } 00189 } 00190 00191 if (stats) 00192 { 00193 count=0; 00194 for (inx=0;inx<256;inx++) 00195 { 00196 count+=lengths[inx]; 00197 if ((ulong) count >= total/2) 00198 break; 00199 } 00200 printf("Total rows: %lu\nTotal words: %lu\n" 00201 "Unique words: %lu\nLongest word: %lu chars (%s)\n" 00202 "Median length: %u\n" 00203 "Average global weight: %f\n" 00204 "Most common word: %lu times, weight: %f (%s)\n", 00205 (long) info->state->records, total, uniq, maxlen, buf_maxlen, 00206 inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); 00207 } 00208 if (lstats) 00209 { 00210 count=0; 00211 for (inx=0; inx<256; inx++) 00212 { 00213 count+=lengths[inx]; 00214 if (count && lengths[inx]) 00215 printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, 00216 (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, 00217 100.0*count/total); 00218 } 00219 } 00220 00221 err: 00222 if (error && error != HA_ERR_END_OF_FILE) 00223 printf("got error %d\n",my_errno); 00224 if (info) 00225 mi_close(info); 00226 return 0; 00227 } 00228 00229 00230 static my_bool 00231 get_one_option(int optid, const struct my_option *opt __attribute__((unused)), 00232 char *argument __attribute__((unused))) 00233 { 00234 switch(optid) { 00235 case 'd': 00236 dump=1; 00237 complain(count || query); 00238 break; 00239 case 's': 00240 stats=1; 00241 complain(query!=0); 00242 break; 00243 case 'c': 00244 count= 1; 00245 complain(dump || query); 00246 break; 00247 case 'l': 00248 lstats=1; 00249 complain(query!=0); 00250 break; 00251 case '?': 00252 case 'h': 00253 usage(); 00254 } 00255 return 0; 00256 } 00257 00258 #include <help_start.h> 00259 00260 static void usage() 00261 { 00262 printf("Use: myisam_ftdump <table_name> <index_num>\n"); 00263 my_print_help(my_long_options); 00264 my_print_variables(my_long_options); 00265 NETWARE_SET_SCREEN_MODE(1); 00266 exit(1); 00267 } 00268 00269 #include <help_end.h> 00270 00271 static void complain(int val) /* Kinda assert :-) */ 00272 { 00273 if (val) 00274 { 00275 printf("You cannot use these options together!\n"); 00276 exit(1); 00277 } 00278 }
1.4.7

