Changeset 2153 for libswish3/trunk/src/libswish3
- Timestamp:
- 07/31/08 23:10:16 (4 months ago)
- Files:
-
- libswish3/trunk/src/libswish3/libswish3.h (modified) (3 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (7 diffs)
- libswish3/trunk/src/libswish3/tokenizer.c (modified) (12 diffs)
- libswish3/trunk/src/libswish3/words.c (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/libswish3.h
r2150 r2153 304 304 struct swish_Token 305 305 { 306 unsigned int pos; 306 unsigned int pos; // this token's position in document 307 307 swish_MetaName *meta; 308 308 xmlChar *value; … … 316 316 { 317 317 unsigned int n; 318 unsigned int pos; // track position in document 318 319 xmlBufferPtr buf; 319 320 swish_Token** tokens; … … 325 326 swish_TokenList *tl; 326 327 swish_Config *config; 327 unsigned int pos; 328 unsigned int pos; // position in iteration 328 329 int ref_cnt; 329 330 }; libswish3/trunk/src/libswish3/parser.c
r2150 r2153 328 328 if (xmlStrEqual(swishtag, (xmlChar *)"br") 329 329 || xmlStrEqual(swishtag, (xmlChar *)"img")) { 330 331 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 332 SWISH_DEBUG_MSG("found html tag '%s' ... bump_word = 1", swishtag); 330 333 parser_data->bump_word = 1; 331 334 } … … 342 345 * elements 343 346 */ 347 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 348 SWISH_DEBUG_MSG("found html !inline tag '%s' ... bump_word = 1", swishtag); 344 349 parser_data->bump_word = 1; 345 350 … … 347 352 else { 348 353 354 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 355 SWISH_DEBUG_MSG("found html inline tag '%s' ... bump_word = 0", swishtag); 349 356 parser_data->bump_word = 0; 350 357 … … 391 398 * do not match across metas 392 399 */ 400 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 401 SWISH_DEBUG_MSG("found html meta tag '%s' ... bump_word = 1", metaname); 393 402 parser_data->bump_word = 1; 394 403 open_tag(parser_data, metaname, NULL); … … 415 424 */ 416 425 426 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 427 SWISH_DEBUG_MSG("found xml tag '%s' ... bump_word = 1", swishtag); 417 428 parser_data->bump_word = 1; 418 429 … … 2065 2076 2066 2077 swish_WordList *tmplist; 2067 2078 2068 2079 if (parser_data->s3->analyzer->tokenlist) { 2069 2080 … … 2076 2087 parser_data->token_iterator->tl, 2077 2088 meta, context); 2078 2079 2089 return; 2080 2090 libswish3/trunk/src/libswish3/tokenizer.c
r2148 r2153 316 316 tl->buf = xmlBufferCreateSize((size_t) SWISH_BUFFER_CHUNK_SIZE); 317 317 tl->n = 0; 318 tl->pos = 0; 318 319 tl->ref_cnt = 0; 319 320 tl->tokens = swish_xmalloc(sizeof(swish_Token *) * SWISH_TOKEN_LIST_SIZE); … … 372 373 stoken->start_byte = xmlBufferLength(tl->buf); 373 374 stoken->len = token_len - 1; /* TODO do we even need NULL? */ 374 stoken->pos = tl->n + 1;375 stoken->pos = ++tl->pos; 375 376 stoken->meta = meta; 376 377 stoken->meta->ref_cnt++; … … 615 616 616 617 } 617 618 618 619 prev_pos = byte_pos; 619 620 … … 645 646 646 647 token = copy; /* restore to top of array so we do not leak */ 648 649 if (cp == SWISH_TOKENPOS_BUMPER[0]) { 650 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 651 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 652 tl->pos++; 653 } 647 654 648 655 continue; … … 652 659 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 653 660 SWISH_DEBUG_MSG("ignoring chr '%s'", chr); 661 662 if (cp == SWISH_TOKENPOS_BUMPER[0]) { 663 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 664 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 665 tl->pos++; 666 } 654 667 655 668 continue; … … 695 708 token = copy; /* restore to top of array */ 696 709 710 } 711 712 if (cp == SWISH_TOKENPOS_BUMPER[0]) { 713 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 714 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 715 tl->pos++; 697 716 } 698 717 … … 718 737 } 719 738 739 if (cp == SWISH_TOKENPOS_BUMPER[0]) { 740 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 741 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 742 tl->pos++; 743 } 744 720 745 continue; 721 746 … … 761 786 SWISH_DEBUG_MSG(" char: %c lower: %c int: %d %#x (next is %c)", buf[i], c, 762 787 (int)c, (unsigned int)c, nextc); 763 788 764 789 if (!ascii_word_table[(int)c]) { 765 790 … … 783 808 784 809 token = copy; 810 811 if (c == SWISH_TOKENPOS_BUMPER[0]) { 812 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 813 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 814 tl->pos++; 815 } 816 785 817 786 818 continue; … … 790 822 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 791 823 SWISH_DEBUG_MSG("ignoring char '%c'", c); 824 825 if (c == SWISH_TOKENPOS_BUMPER[0]) { 826 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 827 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 828 tl->pos++; 829 } 792 830 793 831 continue; … … 826 864 token = copy; 827 865 866 } 867 868 if (c == SWISH_TOKENPOS_BUMPER[0]) { 869 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 870 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 871 tl->pos++; 828 872 } 829 873 … … 846 890 swish_add_token(tl, token, token_len, meta, context); 847 891 } 892 893 if (c == SWISH_TOKENPOS_BUMPER[0]) { 894 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 895 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", tl->pos); 896 tl->pos++; 897 } 898 848 899 continue; 849 900 libswish3/trunk/src/libswish3/words.c
r2148 r2153 327 327 328 328 swish_xfree(utf8_str); 329 330 if (c == SWISH_TOKENPOS_BUMPER[0]) { 331 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 332 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 333 word_pos++; 334 } 329 335 330 336 continue; … … 334 340 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 335 341 SWISH_DEBUG_MSG("ignoring char '%lc'", (wint_t) c); 342 343 if (c == SWISH_TOKENPOS_BUMPER[0]) { 344 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 345 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 346 word_pos++; 347 } 336 348 337 349 continue; … … 377 389 } 378 390 391 if (c == SWISH_TOKENPOS_BUMPER[0]) { 392 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 393 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 394 word_pos++; 395 } 396 379 397 continue; 380 398 … … 388 406 in_word = 1; 389 407 word[w++] = c; 408 409 if (c == SWISH_TOKENPOS_BUMPER[0]) { 410 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 411 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 412 word_pos++; 413 } 414 390 415 continue; 391 416 … … 514 539 } 515 540 541 if (c == SWISH_TOKENPOS_BUMPER[0]) { 542 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 543 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 544 word_pos++; 545 } 546 516 547 continue; 517 548 … … 520 551 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 521 552 SWISH_DEBUG_MSG("ignoring char '%c'", c); 522 553 554 if (c == SWISH_TOKENPOS_BUMPER[0]) { 555 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 556 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 557 word_pos++; 558 } 559 523 560 continue; 524 561 } … … 560 597 } 561 598 599 if (c == SWISH_TOKENPOS_BUMPER[0]) { 600 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 601 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 602 word_pos++; 603 } 604 562 605 continue; 563 606 … … 572 615 in_word = 1; 573 616 word[w++] = c; 617 618 if (c == SWISH_TOKENPOS_BUMPER[0]) { 619 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 620 SWISH_DEBUG_MSG("found tokenpos bumper byte at pos %d", word_pos); 621 word_pos++; 622 } 623 574 624 continue; 575 625
