Changeset 1924
- Timestamp:
- 04/03/07 23:30:21 (1 year ago)
- Files:
-
- libswish3/trunk/src/libswish3/Makefile.am (modified) (1 diff)
- libswish3/trunk/src/libswish3/config.c (modified) (3 diffs)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (3 diffs)
- libswish3/trunk/src/libswish3/mem.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/parser.c (modified) (7 diffs)
- libswish3/trunk/src/libswish3/swish.c (added)
- libswish3/trunk/src/libswish3/words.c (modified) (8 diffs)
- libswish3/trunk/src/swish_lint.c (modified) (11 diffs)
- libswish3/trunk/src/swish_words.c (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/Makefile.am
r1913 r1924 24 24 times.c \ 25 25 words.c \ 26 swish.c \ 26 27 $(myheaders) 27 28 libswish3/trunk/src/libswish3/config.c
r1923 r1924 58 58 static void add_multi_node_to_cv(xmlNode * node, swish_ConfigValue * cv); 59 59 60 static int meta_abbrev_len = 625; /* 26 * 26 - 1 */61 static char * meta_abbrev[] = {62 "Aa", "Ab", "Ac", "Ad", "Ae", "Af", "Ag", "Ah", "Ai", "Aj", "Ak", "Al", "Am",63 "An", "Ao", "Ap", "Aq", "Ar", "As", "At", "Au", "Av", "Aw", "Ax", "Ay", "Az",64 "Ba", "Bb", "Bc", "Bd", "Be", "Bf", "Bg", "Bh", "Bi", "Bj", "Bk", "Bl", "Bm",65 "Bn", "Bo", "Bp", "Bq", "Br", "Bs", "Bt", "Bu", "Bv", "Bw", "Bx", "By", "Bz",66 "Ca", "Cb", "Cc", "Cd", "Ce", "Cf", "Cg", "Ch", "Ci", "Cj", "Ck", "Cl", "Cm",67 "Cn", "Co", "Cp", "Cq", "Cr", "Cs", "Ct", "Cu", "Cv", "Cw", "Cx", "Cy", "Cz",68 "Da", "Db", "Dc", "Dd", "De", "Df", "Dg", "Dh", "Di", "Dj", "Dk", "Dl", "Dm",69 "Dn", "Do", "Dp", "Dq", "Dr", "Ds", "Dt", "Du", "Dv", "Dw", "Dx", "Dy", "Dz",70 "Ea", "Eb", "Ec", "Ed", "Ee", "Ef", "Eg", "Eh", "Ei", "Ej", "Ek", "El", "Em",71 "En", "Eo", "Ep", "Eq", "Er", "Es", "Et", "Eu", "Ev", "Ew", "Ex", "Ey", "Ez",72 "Fa", "Fb", "Fc", "Fd", "Fe", "Ff", "Fg", "Fh", "Fi", "Fj", "Fk", "Fl", "Fm",73 "Fn", "Fo", "Fp", "Fq", "Fr", "Fs", "Ft", "Fu", "Fv", "Fw", "Fx", "Fy", "Fz",74 "Ga", "Gb", "Gc", "Gd", "Ge", "Gf", "Gg", "Gh", "Gi", "Gj", "Gk", "Gl", "Gm",75 "Gn", "Go", "Gp", "Gq", "Gr", "Gs", "Gt", "Gu", "Gv", "Gw", "Gx", "Gy", "Gz",76 "Ha", "Hb", "Hc", "Hd", "He", "Hf", "Hg", "Hh", "Hi", "Hj", "Hk", "Hl", "Hm",77 "Hn", "Ho", "Hp", "Hq", "Hr", "Hs", "Ht", "Hu", "Hv", "Hw", "Hx", "Hy", "Hz",78 "Ia", "Ib", "Ic", "Id", "Ie", "If", "Ig", "Ih", "Ii", "Ij", "Ik", "Il", "Im",79 "In", "Io", "Ip", "Iq", "Ir", "Is", "It", "Iu", "Iv", "Iw", "Ix", "Iy", "Iz",80 "Ja", "Jb", "Jc", "Jd", "Je", "Jf", "Jg", "Jh", "Ji", "Jj", "Jk", "Jl", "Jm",81 "Jn", "Jo", "Jp", "Jq", "Jr", "Js", "Jt", "Ju", "Jv", "Jw", "Jx", "Jy", "Jz",82 "Ka", "Kb", "Kc", "Kd", "Ke", "Kf", "Kg", "Kh", "Ki", "Kj", "Kk", "Kl", "Km",83 "Kn", "Ko", "Kp", "Kq", "Kr", "Ks", "Kt", "Ku", "Kv", "Kw", "Kx", "Ky", "Kz",84 "La", "Lb", "Lc", "Ld", "Le", "Lf", "Lg", "Lh", "Li", "Lj", "Lk", "Ll", "Lm",85 "Ln", "Lo", "Lp", "Lq", "Lr", "Ls", "Lt", "Lu", "Lv", "Lw", "Lx", "Ly", "Lz",86 "Ma", "Mb", "Mc", "Md", "Me", "Mf", "Mg", "Mh", "Mi", "Mj", "Mk", "Ml", "Mm",87 "Mn", "Mo", "Mp", "Mq", "Mr", "Ms", "Mt", "Mu", "Mv", "Mw", "Mx", "My", "Mz",88 "Na", "Nb", "Nc", "Nd", "Ne", "Nf", "Ng", "Nh", "Ni", "Nj", "Nk", "Nl", "Nm",89 "Nn", "No", "Np", "Nq", "Nr", "Ns", "Nt", "Nu", "Nv", "Nw", "Nx", "Ny", "Nz",90 "Oa", "Ob", "Oc", "Od", "Oe", "Of", "Og", "Oh", "Oi", "Oj", "Ok", "Ol", "Om",91 "On", "Oo", "Op", "Oq", "Or", "Os", "Ot", "Ou", "Ov", "Ow", "Ox", "Oy", "Oz",92 "Pa", "Pb", "Pc", "Pd", "Pe", "Pf", "Pg", "Ph", "Pi", "Pj", "Pk", "Pl", "Pm",93 "Pn", "Po", "Pp", "Pq", "Pr", "Ps", "Pt", "Pu", "Pv", "Pw", "Px", "Py", "Pz",94 "Qa", "Qb", "Qc", "Qd", "Qe", "Qf", "Qg", "Qh", "Qi", "Qj", "Qk", "Ql", "Qm",95 "Qn", "Qo", "Qp", "Qq", "Qr", "Qs", "Qt", "Qu", "Qv", "Qw", "Qx", "Qy", "Qz",96 "Ra", "Rb", "Rc", "Rd", "Re", "Rf", "Rg", "Rh", "Ri", "Rj", "Rk", "Rl", "Rm",97 "Rn", "Ro", "Rp", "Rq", "Rr", "Rs", "Rt", "Ru", "Rv", "Rw", "Rx", "Ry", "Rz",98 "Sa", "Sb", "Sc", "Sd", "Se", "Sf", "Sg", "Sh", "Si", "Sj", "Sk", "Sl", "Sm",99 "Sn", "So", "Sp", "Sq", "Sr", "Ss", "St", "Su", "Sv", "Sw", "Sx", "Sy", "Sz",100 "Ta", "Tb", "Tc", "Td", "Te", "Tf", "Tg", "Th", "Ti", "Tj", "Tk", "Tl", "Tm",101 "Tn", "To", "Tp", "Tq", "Tr", "Ts", "Tt", "Tu", "Tv", "Tw", "Tx", "Ty", "Tz",102 "Ua", "Ub", "Uc", "Ud", "Ue", "Uf", "Ug", "Uh", "Ui", "Uj", "Uk", "Ul", "Um",103 "Un", "Uo", "Up", "Uq", "Ur", "Us", "Ut", "Uu", "Uv", "Uw", "Ux", "Uy", "Uz",104 "Va", "Vb", "Vc", "Vd", "Ve", "Vf", "Vg", "Vh", "Vi", "Vj", "Vk", "Vl", "Vm",105 "Vn", "Vo", "Vp", "Vq", "Vr", "Vs", "Vt", "Vu", "Vv", "Vw", "Vx", "Vy", "Vz",106 "Wa", "Wb", "Wc", "Wd", "We", "Wf", "Wg", "Wh", "Wi", "Wj", "Wk", "Wl", "Wm",107 "Wn", "Wo", "Wp", "Wq", "Wr", "Ws", "Wt", "Wu", "Wv", "Ww", "Wx", "Wy", "Wz",108 "Xa", "Xb", "Xc", "Xd", "Xe", "Xf", "Xg", "Xh", "Xi", "Xj", "Xk", "Xl", "Xm",109 "Xn", "Xo", "Xp", "Xq", "Xr", "Xs", "Xt", "Xu", "Xv", "Xw", "Xx", "Xy", "Xz",110 "Ya", "Yb", "Yc", "Yd", "Ye", "Yf", "Yg", "Yh", "Yi", "Yj", "Yk", "Yl", "Ym",111 "Yn", "Yo", "Yp", "Yq", "Yr", "Ys", "Yt", "Yu", "Yv", "Yw", "Yx", "Yy", "Yz",112 "Za", "Zb", "Zc", "Zd", "Ze", "Zf", "Zg", "Zh", "Zi", "Zj", "Zk", "Zl", "Zm",113 "Zn", "Zo", "Zp", "Zq", "Zr", "Zs", "Zt", "Zu", "Zv", "Zw", "Zx", "Zy", "Zz"114 };115 116 60 117 61 static void … … 160 104 161 105 swish_xfree(config); 162 163 swish_mem_debug(); 164 } 165 166 167 168 /* init memory stuff, get global debug and env vars, and verify locale is correct */ 106 } 107 108 109 110 /* init memory stuff, env vars, and verify locale is correct */ 169 111 170 112 swish_Config * … … 176 118 xmlHashTablePtr c, metas, parsers, index, prop, alias, parsewords; 177 119 178 179 /* init memory */180 swish_init_memory();181 182 120 /* verify locale */ 183 121 swish_verify_utf8_locale(); 184 185 /* set global vars */186 setenv("SWISH_DEBUG", "0", 0);187 SWISH_DEBUG = strtol(getenv("SWISH_DEBUG"), (char **) NULL, 10);188 122 189 123 if (SWISH_DEBUG) libswish3/trunk/src/libswish3/libswish3.h
r1923 r1924 127 127 128 128 129 /* global init and cleanup functions -- call these in every linking program */ 130 void swish_init(); 131 void swish_cleanup(); 132 129 133 130 134 /* utils */ … … 351 355 352 356 /* word functions */ 357 void swish_init_words(); 353 358 swish_WordList * swish_init_WordList(); 354 359 void swish_free_WordList(swish_WordList * list); … … 360 365 int base_word_pos, 361 366 int offset ); 367 368 swish_WordList * swish_tokenize_utf8_string( 369 xmlChar * str, 370 xmlChar * metaname, 371 xmlChar * context, 372 int maxwordlen, 373 int minwordlen, 374 int base_word_pos, 375 int offset ); 376 377 swish_WordList * swish_tokenize_ascii_string( 378 xmlChar * str, 379 xmlChar * metaname, 380 xmlChar * context, 381 int maxwordlen, 382 int minwordlen, 383 int base_word_pos, 384 int offset ); 385 386 362 387 size_t swish_add_to_wordlist( swish_WordList * list, 363 388 xmlChar * word, libswish3/trunk/src/libswish3/mem.c
r1923 r1924 37 37 { 38 38 memcount = 0; 39 40 if (SWISH_DEBUG)41 return;42 39 43 /* init the global env vars, but don't override if already set */44 setenv("SWISH_DEBUG", "0", 0);45 SWISH_DEBUG = strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10);46 40 } 47 41 libswish3/trunk/src/libswish3/parser.c
r1921 r1924 56 56 57 57 extern int errno; 58 59 /* 60 * SWISH_DEBUG is set globally here in swish_parse_stdin() and/or 61 * swish_parse_file() we define it as extern in other .c files 62 */ 63 64 int SWISH_DEBUG = 0; 58 extern int SWISH_DEBUG; 59 65 60 int SWISH_PARSER_ERROR = 0; 66 61 int SWISH_PARSER_WARNING = 0; … … 158 153 swish_init_parser() 159 154 { 160 xmlInitParser(); 161 162 /* global var that scripts can check to determine what version of Swish they are 163 * using. the second 0 indicates that it will not override it if already set */ 164 setenv("SWISH3", "1", 0); 165 setenv("SWISH_DEBUG", "0", 0); 166 155 xmlInitParser(); 167 156 xmlSubstituteEntitiesDefault(1); /* resolve text entities */ 157 get_env_vars(); 168 158 } 169 159 … … 1196 1186 { 1197 1187 /* init the global env vars, but don't override if already set */ 1198 setenv("SWISH_DEBUG", "0", 0);1199 SWISH_DEBUG = (int)strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10);1200 1188 1201 1189 setenv("SWISH_PARSER_ERROR", "0", 0); … … 1210 1198 if (SWISH_DEBUG) 1211 1199 { 1212 SWISH_PARSER_ERROR = 1;1213 SWISH_PARSER_WARNING = 1;1214 SWISH_PARSER_FATAL = 1;1200 SWISH_PARSER_ERROR = SWISH_DEBUG; 1201 SWISH_PARSER_WARNING = SWISH_DEBUG; 1202 SWISH_PARSER_FATAL = SWISH_DEBUG; 1215 1203 } 1216 1204 } … … 1241 1229 min_headers = 2; 1242 1230 1243 get_env_vars();1244 1245 1231 ln = swish_xmalloc(SWISH_MAXSTRLEN + 1); 1246 1232 head_buf = xmlBufferCreateSize((SWISH_MAX_HEADERS * SWISH_MAXSTRLEN) + SWISH_MAX_HEADERS); … … 1402 1388 1403 1389 1404 get_env_vars();1405 1406 1390 head = buf_to_head(buf); 1407 1391 … … 1461 1445 double curTime = swish_time_elapsed(); 1462 1446 char *etime; 1463 1464 get_env_vars();1465 1447 1466 1448 swish_ParseData *parse_data = init_parse_data(config, user_data); libswish3/trunk/src/libswish3/words.c
r1923 r1924 40 40 static int is_ignore_word(wint_t c); 41 41 static int bytes_in_char(wint_t c); 42 static void set_debug(); 43 44 /**********************************************************************************************/ 45 46 /* we have our own set_debug here because we might be calling these tokenizing 47 functions without ever calling swish_init_config() 48 */ 49 50 static void set_debug() 51 { 52 if (SWISH_DEBUG) 42 static void make_ascii_tables(); 43 44 45 static int initialized = 0; 46 47 void 48 swish_init_words() 49 { 50 if (initialized) 53 51 return; 54 52 55 setenv("SWISH_DEBUG", "0", 0); 56 /* init the global env var, but don't override if already set */ 57 SWISH_DEBUG = strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10); 53 make_ascii_tables(); 54 initialized = 1; 58 55 } 59 56 … … 233 230 234 231 235 s tatic swish_WordList *236 tokenize_utf8_string(232 swish_WordList * 233 swish_tokenize_utf8_string( 237 234 xmlChar * str, 238 235 xmlChar * metaname, … … 416 413 *************************************************/ 417 414 418 static int ascii_tables_created = 0;419 415 static char ascii_word_table[128]; 420 416 static char ascii_start_table[128]; … … 443 439 444 440 } 445 ascii_tables_created = 1;446 441 } 447 442 … … 458 453 459 454 460 s tatic swish_WordList *461 tokenize_ascii_string(455 swish_WordList * 456 swish_tokenize_ascii_string( 462 457 xmlChar * str, 463 458 xmlChar * metaname, … … 481 476 if (SWISH_DEBUG > 10) 482 477 swish_debug_msg("parsing string: '%s' into words", str); 483 484 485 /* build tables if this is first time through */486 if (!ascii_tables_created)487 make_ascii_tables();488 478 489 479 … … 638 628 { 639 629 640 set_debug(); /* in case this is called without ever swish_init_config() */ 630 if (!initialized) 631 { 632 swish_warn_err("swish_init_words() was not explicitly called -- initializing...."); 633 swish_init_words(); 634 } 641 635 642 636 if (swish_is_ascii( str )) 643 637 { 644 638 //swish_debug_msg("%s is ascii", str); 645 return tokenize_ascii_string( str,639 return swish_tokenize_ascii_string( str, 646 640 metaname, 647 641 context, … … 654 648 { 655 649 //swish_debug_msg("%s is utf8", str); 656 return tokenize_utf8_string( str,650 return swish_tokenize_utf8_string( str, 657 651 metaname, 658 652 context, libswish3/trunk/src/swish_lint.c
r1923 r1924 42 42 int twords = 0; 43 43 44 extern int SWISH_DEBUG; 45 44 46 static struct option longopts[] = 45 47 { … … 47 49 {"debug", required_argument, 0, 'd'}, 48 50 {"help", no_argument, 0, 'h'}, 49 {"version", no_argument, 0, 'v'},50 51 {0, 0, 0, 0} 51 52 }; … … 59 60 void swish_version() 60 61 { 61 printf("libswish3 version %s\n", SWISH_VERSION); 62 printf("libswish3 version %s\n", SWISH_LIB_VERSION); 63 printf("swish version %s\n", SWISH_VERSION); 62 64 } 63 65 … … 68 70 char * descr = "swish_lint is an example program for using SwishParser\n"; 69 71 printf("swish_lint [opts] [- | file(s)]\n"); 70 printf("opts:\n --config conf_file.xml\n --debug \n --help\n --version\n");72 printf("opts:\n --config conf_file.xml\n --debug [lvl]\n --help\n"); 71 73 printf("\n%s\n", descr); 72 74 libxml2_version(); … … 85 87 twords += parse_data->docinfo->nwords; 86 88 87 if ( debug)89 if (SWISH_DEBUG) 88 90 { 89 91 swish_debug_docinfo(parse_data->docinfo); … … 105 107 double startTime = swish_time_elapsed(); 106 108 107 swish_init_parser(); 108 109 swish_Config * config = swish_init_config(); 110 111 /* setting this \after\ make_char_tables() causes weird error... 112 * 113 * xmlSubstituteEntitiesDefault(1); resolve text entities */ 114 109 xmlChar *config_file = NULL; 110 111 swish_init(); 112 113 swish_Config * config; 115 114 116 115 while ((ch = getopt_long(argc, argv, "c:d:f:h", longopts, &option_index)) != -1) … … 135 134 136 135 //printf("optarg = %s\n", optarg); 137 config = swish_add_config((xmlChar *) optarg, config);136 config_file = swish_xstrdup( optarg ); 138 137 break; 139 138 140 case 'v':141 libxml2_version();142 swish_version();143 break;144 145 139 146 140 case 'd': … … 150 144 err(1, "-d option requires a positive integer as argument\n"); 151 145 152 setenv("SWISH_DEBUG", optarg, 1); 153 debug = (int) strtol(getenv("SWISH_DEBUG"), (char **) NULL, 10); 154 /* printf("debug at level %d\n", debug); */ 155 146 SWISH_DEBUG = (int) strtol(optarg, (char **) NULL, 10); 156 147 break; 157 148 … … 169 160 170 161 } 162 163 config = swish_init_config(); 164 165 if (config_file != NULL) 166 { 167 config = swish_add_config(config_file, config); 168 } 171 169 172 170 i = optind; … … 180 178 } 181 179 182 if ( debug== 20)180 if (SWISH_DEBUG == 20) 183 181 { 184 182 swish_debug_config(config); … … 219 217 220 218 swish_free_config( config ); 221 swish_free_parser(); 222 swish_mem_debug(); 219 220 if (config_file != NULL) 221 swish_xfree(config_file); 222 223 swish_cleanup(); 223 224 224 225 return (0); libswish3/trunk/src/swish_words.c
r1921 r1924 70 70 int max = 255; 71 71 int min = 1; 72 72 73 73 swish_WordList *list; 74 74 … … 122 122 } 123 123 124 swish_init(); /* call after we have set optional debug flag */ 125 124 126 i = optind; 125 127 … … 133 135 134 136 } 137 138 swish_cleanup(); 135 139 136 140 return (0);
