Changeset 2010 for libswish3/trunk/src
- Timestamp:
- 02/10/08 22:26:06 (1 year ago)
- Files:
-
- libswish3/trunk/src/libswish3/analyzer.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/config.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/docinfo.c (modified) (2 diffs)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (6 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (24 diffs)
- libswish3/trunk/src/libswish3/swish.c (modified) (2 diffs)
- libswish3/trunk/src/swish_lint.c (modified) (7 diffs)
- libswish3/trunk/src/swish_words.c (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/analyzer.c
r1952 r2010 30 30 swish_init_analyzer( swish_Config * config ) 31 31 { 32 swish_Analyzer *a = (swish_Analyzer *) swish_xmalloc(sizeof(swish_Analyzer)); 32 swish_Analyzer *a; 33 a = swish_xmalloc(sizeof(swish_Analyzer)); 33 34 34 35 /* TODO get this from config */ libswish3/trunk/src/libswish3/config.c
r1955 r2010 522 522 { 523 523 xmlNode *root = NULL; 524 xmlChar *toptag = "swishconfig";524 xmlChar *toptag = (xmlChar*)"swishconfig"; 525 525 526 526 root = xmlDocGetRootElement(doc); libswish3/trunk/src/libswish3/docinfo.c
r1952 r2010 205 205 swish_xfree(i->mime); 206 206 207 i->mime = swish_get_mime_type( parse_data-> config, i->ext );207 i->mime = swish_get_mime_type( parse_data->s3->config, i->ext ); 208 208 209 209 if (SWISH_DEBUG > 9) … … 213 213 swish_xfree(i->parser); 214 214 215 i->parser = swish_get_parser( parse_data-> config, i->mime );215 i->parser = swish_get_parser( parse_data->s3->config, i->mime ); 216 216 217 217 return 1; libswish3/trunk/src/libswish3/libswish3.h
r2009 r2010 146 146 #endif 147 147 148 typedef struct swish_3 swish_3; 149 typedef struct swish_Token swish_Token; 148 150 typedef struct swish_StringList swish_StringList; 149 151 typedef struct swish_Config swish_Config; … … 169 171 */ 170 172 173 struct swish_3 174 { 175 int ref_cnt; 176 void *stash; 177 swish_Config *config; 178 swish_Analyzer *analyzer; 179 swish_Parser *parser; 180 }; 181 182 struct swish_Token 183 { 184 xmlChar *start_ptr; 185 int tok_bytes; 186 int start; 187 int end; 188 xmlChar *meta; 189 xmlChar *ctxt; 190 unsigned int wpos; 191 unsigned int offset; 192 swish_Analyzer *analyzer; 193 swish_WordList *list; 194 }; 195 171 196 struct swish_StringList 172 197 { … … 286 311 { 287 312 int ref_cnt; // for script bindings 288 swish_Config *config; // config object289 swish_Analyzer *analyzer; // analyzer object290 313 void (*handler)(swish_ParseData*); // handler reference 291 314 void *stash; // for script bindings … … 295 318 struct swish_ParseData 296 319 { 320 swish_3 *s3; // main object 297 321 xmlBufferPtr meta_buf; // tmp MetaName buffer 298 322 xmlBufferPtr prop_buf; // tmp Property buffer 299 323 xmlChar *tag; // current tag name 300 324 swish_DocInfo *docinfo; // document-specific properties 301 swish_Config *config; // global config302 325 unsigned int context_as_meta; // index tokens under all applicable MetaNames 303 326 unsigned int no_index; // toggle flag for special comments … … 312 335 swish_NamedBuffer *properties; // buffer all properties 313 336 swish_NamedBuffer *metanames; // buffer all metanames 314 swish_Analyzer *analyzer; // Analyzer struct 315 void *stash; // for script bindings 316 }; 317 318 /* 319 =cut 320 */ 321 322 /* 323 =head2 Global Functions 324 */ 325 void swish_init(); 326 void swish_cleanup(); 337 }; 338 339 /* 340 =cut 341 */ 342 343 /* 344 =head2 Object Functions 345 */ 346 swish_3 * swish_init_swish3( void (*handler) (swish_ParseData *), void *stash ); 347 void swish_free_swish3( swish_3 *s3 ); 327 348 /* 328 349 =cut … … 436 457 =head2 Parser Functions 437 458 */ 438 swish_Parser * swish_init_parser( swish_Config * config, 439 swish_Analyzer * analyzer, 440 void (*handler) (swish_ParseData *), 441 void *stash 442 ); 443 void swish_free_parser( swish_Parser * parser ); 444 int swish_parse_file( swish_Parser * parser, 445 xmlChar *filename, 446 void * stash ); 447 int swish_parse_fh( swish_Parser * parser, 448 FILE * fh, 449 void * stash ); 450 int swish_parse_buffer( swish_Parser * parser, 451 xmlChar * buf, 452 void * stash ); 459 swish_Parser * swish_init_parser( void (*handler) (swish_ParseData *) ); 460 void swish_free_parser( swish_Parser * parser ); 461 int swish_parse_file( swish_3 * s3, 462 xmlChar *filename); 463 int swish_parse_fh( swish_3 * s3, 464 FILE * fh); 465 int swish_parse_buffer( swish_3 * s3, 466 xmlChar * buf); 453 467 /* 454 468 =cut libswish3/trunk/src/libswish3/parser.c
r2009 r2010 123 123 124 124 static swish_ParseData * 125 init_parse_data(swish_ Config * config, swish_Analyzer * analyzer, void * stash);125 init_parse_data(swish_3 * s3); 126 126 static void free_parse_data(swish_ParseData * parse_data); 127 127 … … 150 150 swish_Parser * 151 151 swish_init_parser( 152 swish_Config * config, 153 swish_Analyzer * analyzer, 154 void (*handler) (swish_ParseData *), 155 void * stash 152 void (*handler) (swish_ParseData *) 156 153 ) 157 154 { 158 155 swish_Parser * p = (swish_Parser*) swish_xmalloc(sizeof(swish_Parser)); 159 p->config = config;160 p->analyzer = analyzer;161 p->stash = stash;162 156 p->handler = handler; 163 157 p->ref_cnt = 0; … … 296 290 297 291 /* change our internal name for this tag if it is aliased in config */ 298 alias = swish_get_config_value(parse_data-> config, (xmlChar*)SWISH_ALIAS, swishtag);292 alias = swish_get_config_value(parse_data->s3->config, (xmlChar*)SWISH_ALIAS, swishtag); 299 293 if (alias) 300 294 { … … 351 345 } 352 346 353 if (parse_data-> analyzer->tokenize)347 if (parse_data->s3->analyzer->tokenize) 354 348 { 355 349 … … 450 444 451 445 /* set property if this tag is configured for it */ 452 if (swish_config_value_exists(parse_data-> config, (xmlChar*)SWISH_PROP, parse_data->tag))446 if (swish_config_value_exists(parse_data->s3->config, (xmlChar*)SWISH_PROP, parse_data->tag)) 453 447 { 454 448 if (SWISH_DEBUG == SWISH_DEBUG_PARSER) … … 464 458 465 459 /* likewise for metastack */ 466 if (swish_config_value_exists(parse_data-> config, (xmlChar*)SWISH_META, parse_data->tag))460 if (swish_config_value_exists(parse_data->s3->config, (xmlChar*)SWISH_META, parse_data->tag)) 467 461 { 468 462 if (SWISH_DEBUG == SWISH_DEBUG_PARSER) … … 736 730 737 731 static swish_ParseData * 738 init_parse_data( swish_Config * config, swish_Analyzer * analyzer, void * stash)732 init_parse_data( swish_3 * s3 ) 739 733 { 740 734 … … 744 738 swish_ParseData *ptr = (swish_ParseData *) swish_xmalloc(sizeof(swish_ParseData)); 745 739 746 ptr->s tash = stash;740 ptr->s3 = s3; 747 741 748 742 ptr->meta_buf = xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE); 749 743 ptr->prop_buf = xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE); 750 751 ptr->config = config; 752 ptr->analyzer = analyzer; 753 744 754 745 ptr->tag = NULL; 755 746 ptr->wordlist = swish_init_wordlist(); 756 ptr->properties = swish_init_nb( config, (xmlChar*)SWISH_PROP);757 ptr->metanames = swish_init_nb( config, (xmlChar*)SWISH_META);747 ptr->properties = swish_init_nb(s3->config, (xmlChar*)SWISH_PROP); 748 ptr->metanames = swish_init_nb(s3->config, (xmlChar*)SWISH_META); 758 749 759 750 /* prime the stacks */ … … 1193 1184 int 1194 1185 swish_parse_fh( 1195 swish_Parser * parser, 1196 FILE * fh, 1197 void * stash 1186 swish_3 * s3, 1187 FILE * fh 1198 1188 ) 1199 1189 { … … 1220 1210 swish_mem_debug(); 1221 1211 1222 ln = swish_xmalloc(SWISH_MAXSTRLEN + 1);1223 head_buf = xmlBufferCreateSize((SWISH_MAX_HEADERS * SWISH_MAXSTRLEN) + SWISH_MAX_HEADERS);1212 ln = swish_xmalloc(SWISH_MAXSTRLEN + 1); 1213 head_buf = xmlBufferCreateSize((SWISH_MAX_HEADERS * SWISH_MAXSTRLEN) + SWISH_MAX_HEADERS); 1224 1214 1225 1215 swish_mem_debug(); … … 1254 1244 /* blank line indicates body */ 1255 1245 curTime = swish_time_elapsed(); 1256 parse_data = init_parse_data( parser->config, parser->analyzer, stash);1246 parse_data = init_parse_data(s3); 1257 1247 head = buf_to_head( (xmlChar*)xmlBufferContent(head_buf) ); 1258 1248 parse_data->docinfo = head_to_docinfo(head); 1259 swish_check_docinfo(parse_data->docinfo, parser->config);1249 swish_check_docinfo(parse_data->docinfo, s3->config); 1260 1250 1261 1251 if (SWISH_DEBUG > 9) … … 1284 1274 1285 1275 /* pass to callback function */ 1286 (* parser->handler)(parse_data);1276 (*s3->parser->handler)(parse_data); 1287 1277 1288 1278 if (SWISH_DEBUG > 9) … … 1373 1363 */ 1374 1364 int 1375 swish_parse_buffer( 1376 swish_Parser * parser, 1377 xmlChar * buf, 1378 void * stash 1379 ) 1365 swish_parse_buffer( swish_3 * s3, xmlChar * buf ) 1380 1366 { 1381 1367 … … 1391 1377 SWISH_DEBUG_MSG("number of headlines: %d", head->nlines); 1392 1378 1393 swish_ParseData *parse_data = init_parse_data( parser->config, parser->analyzer, stash);1379 swish_ParseData *parse_data = init_parse_data(s3); 1394 1380 parse_data->docinfo = head_to_docinfo(head); 1395 swish_check_docinfo(parse_data->docinfo, parser->config);1381 swish_check_docinfo(parse_data->docinfo, s3->config); 1396 1382 1397 1383 /* reposition buf pointer at start of body (just past head) */ … … 1402 1388 1403 1389 /* pass to callback function */ 1404 (* parser->handler)(parse_data);1390 (*s3->parser->handler)(parse_data); 1405 1391 1406 1392 if (SWISH_DEBUG > 1) … … 1432 1418 int 1433 1419 swish_parse_file( 1434 swish_Parser * parser, 1435 xmlChar * filename, 1436 void * stash 1420 swish_3 * s3, 1421 xmlChar * filename 1437 1422 ) 1438 1423 { … … 1441 1426 char *etime; 1442 1427 1443 swish_ParseData *parse_data = init_parse_data( parser->config, parser->analyzer, stash);1428 swish_ParseData *parse_data = init_parse_data(s3); 1444 1429 parse_data->docinfo = swish_init_docinfo(); 1445 1430 … … 1454 1439 1455 1440 /* pass to callback function */ 1456 (* parser->handler) (parse_data);1441 (*s3->parser->handler) (parse_data); 1457 1442 1458 1443 if (SWISH_DEBUG > 1) … … 1609 1594 1610 1595 out = NULL; 1611 enc = getenv("SWISH_ENCODING");1596 enc = (xmlChar*)getenv("SWISH_ENCODING"); 1612 1597 1613 1598 /* TODO better encoding detection. for now we assume unknown text files are latin1 */ … … 1754 1739 swish_WordList *tmplist; 1755 1740 1756 if (parse_data-> analyzer->tokenizer == NULL)1741 if (parse_data->s3->analyzer->tokenizer == NULL) 1757 1742 { 1758 1743 … … 1760 1745 1761 1746 tmplist = swish_tokenize( 1762 parse_data-> analyzer,1747 parse_data->s3->analyzer, 1763 1748 string, 1764 1749 parse_data->offset, … … 1775 1760 /* user-defined tokenizer */ 1776 1761 1777 tmplist = (*parse_data-> analyzer->tokenizer) (1778 parse_data-> analyzer,1762 tmplist = (*parse_data->s3->analyzer->tokenizer) ( 1763 parse_data->s3->analyzer, 1779 1764 string, 1780 1765 parse_data->offset, … … 1898 1883 swish_TagStack *s = parse_data->propstack; 1899 1884 int cleanwsp = 1; 1900 xmlHashTablePtr props = swish_subconfig_hash( parse_data-> config, (xmlChar*)SWISH_PROP );1885 xmlHashTablePtr props = swish_subconfig_hash( parse_data->s3->config, (xmlChar*)SWISH_PROP ); 1901 1886 1902 1887 /* should we strip whitespace from this particular property ? */ … … 1915 1900 for (s->temp = s->head; s->temp != NULL; s->temp = s->temp->next) 1916 1901 { 1917 if (xmlStrEqual(s->temp->name, "_")) /* top of the stack is just a placeholder */1902 if (xmlStrEqual(s->temp->name, (xmlChar*)"_")) /* top of the stack is just a placeholder */ 1918 1903 continue; 1919 1904 libswish3/trunk/src/libswish3/swish.c
r1927 r2010 26 26 int SWISH_DEBUG = 0; 27 27 28 void static swish_init(); 29 30 swish_3* 31 swish_init_swish3( void (*handler) (swish_ParseData *), void *stash ) 32 { 33 swish_3 *s3; 34 swish_init(); 35 s3 = swish_xmalloc(sizeof(swish_3)); 36 s3->ref_cnt++; 37 s3->config = swish_init_config(); 38 s3->analyzer = swish_init_analyzer(s3->config); 39 s3->parser = swish_init_parser(handler); 40 s3->stash = stash; 41 return s3; 42 } 43 28 44 void 45 swish_free_swish3(swish_3* s3) 46 { 47 swish_free_parser(s3->parser); 48 swish_free_analyzer(s3->analyzer); 49 swish_free_config(s3->config); 50 swish_xfree(s3); 51 swish_mem_debug(); 52 } 53 54 55 void static 29 56 swish_init() 30 57 { … … 44 71 45 72 } 46 47 void48 swish_cleanup()49 {50 swish_mem_debug();51 }52 libswish3/trunk/src/swish_lint.c
r1955 r2010 102 102 extern char *optarg; 103 103 extern int optind; 104 int option_index = 0;105 int files = 0;106 int overwrite = 0;104 int option_index; 105 int files; 106 int overwrite; 107 107 char *etime; 108 double startTime = swish_time_elapsed(); 109 108 double start_time; 110 109 xmlChar *config_file = NULL; 111 112 swish_init(); 113 114 swish_Config * config; 115 swish_Analyzer * analyzer; 116 swish_Parser * parser; 110 swish_3 *s3; 111 112 option_index = 0; 113 files = 0; 114 overwrite = 0; 115 start_time = swish_time_elapsed(); 116 s3 = swish_init_swish3( &handler, NULL ); 117 117 118 118 while ((ch = getopt_long(argc, argv, "c:d:f:h", longopts, &option_index)) != -1) … … 164 164 } 165 165 166 config = swish_init_config();167 168 166 if (config_file != NULL) 169 167 { 170 config = swish_add_config(config_file,config);168 s3->config = swish_add_config(config_file, s3->config); 171 169 } 172 170 … … 176 174 if (!i || i >= argc) 177 175 { 178 swish_free_ config(config);176 swish_free_swish3( s3 ); 179 177 usage(); 180 178 … … 183 181 if (SWISH_DEBUG == 20) 184 182 { 185 swish_debug_config(config); 186 } 187 188 analyzer = swish_init_analyzer( config ); 189 parser = swish_init_parser( config, analyzer, &handler, NULL ); 190 183 swish_debug_config(s3->config); 184 } 185 191 186 for (; i < argc; i++) 192 187 { … … 197 192 printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); 198 193 printf("parse_file for %s\n", argv[i]); 199 if (! swish_parse_file( parser, (unsigned char *) argv[i], NULL))194 if (! swish_parse_file(s3, (unsigned char *) argv[i])) 200 195 files++; 201 196 … … 205 200 206 201 printf("reading from stdin\n"); 207 files = swish_parse_fh( parser, NULL, NULL);202 files = swish_parse_fh(s3, NULL); 208 203 209 204 } … … 214 209 printf("total words: %d\n", twords); 215 210 216 etime = swish_print_time(swish_time_elapsed() - start Time);211 etime = swish_print_time(swish_time_elapsed() - start_time); 217 212 printf("%s total time\n\n", etime); 218 213 swish_xfree(etime); 219 214 220 swish_free_analyzer( analyzer ); 221 swish_free_config( config ); 222 swish_free_parser( parser ); 215 swish_free_swish3( s3 ); 223 216 224 217 if (config_file != NULL) 225 218 swish_xfree(config_file); 226 219 227 swish_cleanup();228 229 220 return (0); 230 221 } libswish3/trunk/src/swish_words.c
r1930 r2010 63 63 { 64 64 int i, ch; 65 int option_index = 0;65 int option_index; 66 66 extern char *optarg; 67 67 extern int optind; 68 68 xmlChar *string; 69 xmlChar *meta = (xmlChar*)SWISH_DEFAULT_METANAME; 69 swish_WordList *list; 70 xmlChar *meta; 71 swish_3 *s3; 70 72 71 string = NULL;72 73 s wish_WordList *list;73 meta = (xmlChar*)SWISH_DEFAULT_METANAME; 74 option_index = 0; 75 string = NULL; 74 76 75 77 while ((ch = getopt_long(argc, argv, "d:f:h", longopts, &option_index)) != -1) … … 115 117 } 116 118 117 swish_init(); /* call after we have set optional debug flag */ 118 119 swish_Config * config = swish_init_config(); 120 swish_Analyzer * analyzer = swish_init_analyzer( config ); 121 122 i = optind; 119 s3 = swish_init_swish3( NULL, NULL ); /* call after we have set optional debug flag */ 120 i = optind; 123 121 124 122 for (; i < argc; i++) 125 123 { 126 list = swish_tokenize( analyzer, (xmlChar *) argv[i], 0, 0, meta, meta );124 list = swish_tokenize( s3->analyzer, (xmlChar *) argv[i], 0, 0, meta, meta ); 127 125 printf("parsed: %s\n", argv[i]); 128 126 swish_debug_wordlist(list); … … 132 130 if (string != NULL) 133 131 { 134 list = swish_tokenize( analyzer, string, 0, 0, meta, meta );132 list = swish_tokenize( s3->analyzer, string, 0, 0, meta, meta ); 135 133 printf("parsed: %s\n", string); 136 134 swish_debug_wordlist(list); … … 139 137 } 140 138 141 swish_free_analyzer( analyzer ); 142 swish_free_config( config ); 139 swish_free_swish3( s3 ); 143 140 144 swish_cleanup();145 146 141 return (0); 147 142 }
