Changeset 1923
- Timestamp:
- 03/19/07 11:58:02 (1 year ago)
- Files:
-
- libswish3/trunk/CONTRIB (added)
- libswish3/trunk/README (modified) (1 diff)
- libswish3/trunk/bindings/perl/3.xs (modified) (6 diffs)
- libswish3/trunk/bindings/perl/t/02xml2_version.t (modified) (2 diffs)
- libswish3/trunk/bindings/perl/t/10tokenize.t (added)
- libswish3/trunk/bindings/perl/typemap (modified) (1 diff)
- libswish3/trunk/libswish3-config.in (modified) (1 diff)
- libswish3/trunk/libswish3.build (modified) (2 diffs)
- libswish3/trunk/src/libswish3/config.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (3 diffs)
- libswish3/trunk/src/libswish3/mem.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/words.c (modified) (33 diffs)
- libswish3/trunk/src/swish_lint.c (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/README
r1913 r1923 29 29 # ReplaceRules 30 30 # SpiderDirectory 31 32 33 34 Getting Started 35 ----------------- 36 37 See the INSTALL doc. 38 39 But basically: 40 41 ./bootstrap (only necessary if you are doing development) 42 ./configure 43 make && make test 44 sudo make install 45 46 libswish3/trunk/bindings/perl/3.xs
r1921 r1923 217 217 MODULE = SWISH::3 PACKAGE = SWISH::3 218 218 219 PROTOTYPES: disable219 PROTOTYPES: enable 220 220 221 221 SV* … … 240 240 RETVAL 241 241 242 242 SV* 243 libswish3_version(self) 244 SV* self; 245 246 CODE: 247 RETVAL = newSVpvn( SWISH_LIB_VERSION, strlen(SWISH_LIB_VERSION) ); 248 249 OUTPUT: 250 RETVAL 251 252 243 253 # ********************************************************************************* 244 254 MODULE = SWISH::3 PACKAGE = SWISH::3::Constants … … 391 401 RETVAL 392 402 393 394 403 404 405 swish_WordList * 406 tokenize(self, str, ...) 407 SV* self; 408 SV* str; 409 410 PREINIT: 411 char * CLASS; 412 char * metaname = SWISH_DEFAULT_METANAME; 413 char * context = SWISH_DEFAULT_METANAME; 414 int maxwordlen = SWISH_MAX_WORD_LEN; 415 int minwordlen = SWISH_MIN_WORD_LEN; 416 int word_pos = 0; 417 int offset = 0; 418 419 CODE: 420 CLASS = _which_class("WordList"); 421 422 if ( items > 2 ) 423 { 424 metaname = SvPV(ST(2), PL_na); 425 426 if ( items > 3 ) 427 context = SvPV(ST(3), PL_na); 428 429 if ( items > 4 ) 430 maxwordlen = (int)SvIV(ST(4)); 431 432 if ( items > 5 ) 433 minwordlen = (int)SvIV(ST(5)); 434 435 if ( items > 6 ) 436 word_pos = (int)SvIV(ST(6)); 437 438 if ( items > 7 ) 439 offset = (int)SvIV(ST(7)); 440 441 } 442 443 RETVAL = swish_tokenize( 444 (xmlChar*)SvPV(str, PL_na), 445 (xmlChar*)metaname, 446 (xmlChar*)context, 447 maxwordlen, 448 minwordlen, 449 word_pos, 450 offset); 451 452 RETVAL->ref_cnt++; 453 454 OUTPUT: 455 RETVAL 456 457 395 458 396 459 # ******************************************************************************* … … 581 644 582 645 646 void 647 DESTROY(self) 648 swish_WordList * self 649 650 CODE: 651 self->ref_cnt--; 652 if (!self->ref_cnt) 653 { 654 swish_free_WordList(self); 655 } 656 657 583 658 584 659 # ******************************************************************************* … … 657 732 CODE: 658 733 CLASS = _which_class("WordList"); 734 735 # MUST increment refcnt 2x so that SWISH::3::Parser::WordList::DESTROY 736 # does not free it. 737 self->wordlist->ref_cnt += 2; 659 738 RETVAL = self->wordlist; 660 739 … … 774 853 { 775 854 swish_free_config(self); 776 swish_mem_debug();777 855 } 778 856 libswish3/trunk/bindings/perl/t/02xml2_version.t
r1920 r1923 1 use Test::More tests => 3;1 use Test::More tests => 4; 2 2 3 3 use Devel::Peek; … … 11 11 diag($s); 12 12 13 ok( my $l = SWISH::3->libswish3_version, "libswish3 version"); 14 diag($l); 15 13 16 #Dump $v; libswish3/trunk/bindings/perl/typemap
r1914 r1923 1 1 TYPEMAP 2 const char * T_PV3 xmlChar * T_PV2 const char * T_PV 3 xmlChar * T_PV 4 4 swish_Config * O_OBJECT 5 xmlHashTablePtr O_OBJECT5 xmlHashTablePtr O_OBJECT 6 6 swish_ParseData * O_OBJECT 7 xmlBufferPtr T_IV7 xmlBufferPtr T_IV 8 8 swish_WordList * O_OBJECT 9 9 swish_DocInfo * O_OBJECT libswish3/trunk/libswish3-config.in
r1913 r1923 76 76 77 77 --libs) 78 echo -L${libdir} -lswish p@Z_LIBS@ @LIBS@78 echo -L${libdir} -lswish3 @Z_LIBS@ @LIBS@ 79 79 ;; 80 80 libswish3/trunk/libswish3.build
r1913 r1923 7 7 use File::Path; 8 8 9 my $tmp = '/tmp/swish parser.build';10 my $htdocs = '/opt/trac/swish parser/htdocs';9 my $tmp = '/tmp/swish3.build'; 10 my $htdocs = '/opt/trac/swish3/htdocs'; 11 11 my $doc = $htdocs . '/doc'; 12 12 my $dl = $htdocs . '/download'; … … 38 38 print P "<ul>"; 39 39 for my $pod ( 40 qw/ swish parser.7 swishp_lint.1 swishp_words.1 swishp_isw.1 libswishp.3 /)40 qw/ swish_lint.1 swish_words.1 swish_isw.1 libswish3.3 /) 41 41 { 42 42 shell( libswish3/trunk/src/libswish3/config.c
r1921 r1923 160 160 161 161 swish_xfree(config); 162 163 swish_mem_debug(); 162 164 } 163 165 libswish3/trunk/src/libswish3/libswish3.h
r1921 r1923 29 29 30 30 31 #define SWISH_LIB_VERSION "0.1.0" 31 32 #define SWISH_VERSION "3.0.0" 32 33 #define SWISH_BUFFER_CHUNK_SIZE 10000 … … 41 42 42 43 #define SWISH_CONTRACTIONS 1 43 #define SWISH_NO_LOWER_UTF 144 /* chars > 127 will NOT be lowercased by default */45 44 46 45 #define SWISH_SPECIAL_ARG 1 … … 279 278 swish_Word *current; // for iterating 280 279 unsigned int nwords; 280 unsigned int ref_cnt; // for scripting languages 281 281 }; 282 282 libswish3/trunk/src/libswish3/mem.c
r1913 r1923 37 37 { 38 38 memcount = 0; 39 40 if (SWISH_DEBUG) 41 return; 42 39 43 /* init the global env vars, but don't override if already set */ 40 44 setenv("SWISH_DEBUG", "0", 0); libswish3/trunk/src/libswish3/words.c
r1922 r1923 30 30 #include "libswish3.h" 31 31 32 static int WORD_DEBUG;32 extern int SWISH_DEBUG; 33 33 static int strip_ascii_chars(xmlChar * word, int len); 34 34 static int strip_wide_chars(wchar_t * word, int len); … … 44 44 /**********************************************************************************************/ 45 45 46 /* we have our own set_debug here because we might be calling these tokenizing 47 functions without ever calling swish_init_config() 48 */ 49 46 50 static void set_debug() 47 51 { 52 if (SWISH_DEBUG) 53 return; 54 48 55 setenv("SWISH_DEBUG", "0", 0); 49 56 /* init the global env var, but don't override if already set */ 50 WORD_DEBUG = strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10);57 SWISH_DEBUG = strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10); 51 58 } 52 59 … … 58 65 { 59 66 swish_WordList *wl = (swish_WordList *) swish_xmalloc(sizeof(swish_WordList)); 60 wl->head = NULL;61 wl->tail = NULL;67 wl->head = NULL; 68 wl->tail = NULL; 62 69 wl->current = NULL; 63 wl->nwords = 0; 64 65 set_debug(); 66 70 wl->nwords = 0; 71 wl->ref_cnt = 0; 67 72 return wl; 68 73 } … … 73 78 swish_Word *t; 74 79 75 if ( WORD_DEBUG > 9)80 if (SWISH_DEBUG > 9) 76 81 swish_debug_msg("freeing swish_WordList"); 77 82 … … 80 85 while (list->current != NULL) 81 86 { 82 if ( WORD_DEBUG > 9)87 if (SWISH_DEBUG > 9) 83 88 swish_debug_msg("free metaname: %s", list->current->metaname); 84 89 85 90 swish_xfree(list->current->metaname); 86 91 87 if ( WORD_DEBUG > 9)92 if (SWISH_DEBUG > 9) 88 93 swish_debug_msg("free context: %s", list->current->context); 89 94 90 95 swish_xfree(list->current->context); 91 96 92 if ( WORD_DEBUG > 9)97 if (SWISH_DEBUG > 9) 93 98 swish_debug_msg("free word: %s", list->current->word); 94 99 95 100 swish_xfree(list->current->word); 96 101 97 if ( WORD_DEBUG > 9)102 if (SWISH_DEBUG > 9) 98 103 swish_debug_msg("free Word struct"); 99 104 … … 103 108 } 104 109 105 if ( WORD_DEBUG > 9)110 if (SWISH_DEBUG > 9) 106 111 swish_debug_msg("reset nwords"); 107 112 108 113 list->nwords = 0; 109 114 110 if ( WORD_DEBUG > 9)115 if (SWISH_DEBUG > 9) 111 116 swish_debug_msg("free list"); 112 117 … … 220 225 } 221 226 222 if( WORD_DEBUG > 5 )227 if( SWISH_DEBUG > 5 ) 223 228 swish_debug_msg(" %lc is %d bytes long", ch, len); 224 229 … … 239 244 ) 240 245 { 246 241 247 int byte_count = 0; 242 248 swish_WordList *list = swish_init_WordList(); … … 261 267 in_word = 0; 262 268 263 if ( WORD_DEBUG > 10)269 if (SWISH_DEBUG > 10) 264 270 swish_debug_msg("parsing string: '%ls' into words", wide); 265 271 … … 270 276 byte_count += bytes_in_char((wint_t)c); 271 277 272 if ( WORD_DEBUG > 10)278 if (SWISH_DEBUG > 10) 273 279 swish_debug_msg(" wchar: %lc lower: %lc int: %d %#x\n orig: %lc %ld %#lx (next is %lc)", 274 280 (wint_t) wide[i], … … 294 300 if (in_word) 295 301 { 296 if ( WORD_DEBUG > 10)302 if (SWISH_DEBUG > 10) 297 303 swish_debug_msg("found end of word: >%lc<", (wint_t)c); 298 304 … … 316 322 else 317 323 { 318 if ( WORD_DEBUG > 10)324 if (SWISH_DEBUG > 10) 319 325 swish_debug_msg("skipping word >%s< -- too short: %d", utf8_str, wl); 320 326 } … … 327 333 else 328 334 { 329 if ( WORD_DEBUG > 10)335 if (SWISH_DEBUG > 10) 330 336 swish_debug_msg("ignoring char >%lc<", (wint_t)c); 331 337 … … 340 346 { 341 347 342 if ( WORD_DEBUG > 10)348 if (SWISH_DEBUG > 10) 343 349 swish_debug_msg("adding to word: >%lc<", (wint_t)c); 344 350 … … 349 355 { 350 356 351 if ( WORD_DEBUG > 10)357 if (SWISH_DEBUG > 10) 352 358 swish_debug_msg("forcing end of word: >%lc<", (wint_t)c); 353 359 … … 371 377 else 372 378 { 373 if ( WORD_DEBUG > 10)379 if (SWISH_DEBUG > 10) 374 380 swish_debug_msg("skipping word >%ls< -- too short: %d", word, wl); 375 381 } … … 385 391 { 386 392 387 if ( WORD_DEBUG > 10)393 if (SWISH_DEBUG > 10) 388 394 swish_debug_msg("start a word with >%lc<", (wint_t)c); 389 395 … … 473 479 in_word = 0; 474 480 475 if ( WORD_DEBUG > 10)481 if (SWISH_DEBUG > 10) 476 482 swish_debug_msg("parsing string: '%s' into words", str); 477 483 … … 488 494 byte_count++; 489 495 490 if ( WORD_DEBUG > 10)496 if (SWISH_DEBUG > 10) 491 497 swish_debug_msg(" char: %c lower: %c int: %d %#x (next is %c)", 492 498 str[i], … … 509 515 if (in_word) 510 516 { 511 if ( WORD_DEBUG > 10)517 if (SWISH_DEBUG > 10) 512 518 swish_debug_msg("found end of word: >%c<", c); 513 519 … … 530 536 else 531 537 { 532 if ( WORD_DEBUG > 10)538 if (SWISH_DEBUG > 10) 533 539 swish_debug_msg("skipping word >%s< -- too short: %d", word, wl); 534 540 } … … 539 545 else 540 546 { 541 if ( WORD_DEBUG > 10)547 if (SWISH_DEBUG > 10) 542 548 swish_debug_msg("ignoring char >%c<", c); 543 549 … … 552 558 { 553 559 554 if ( WORD_DEBUG > 10)560 if (SWISH_DEBUG > 10) 555 561 swish_debug_msg("adding to word: >%c<", c); 556 562 … … 561 567 { 562 568 563 if ( WORD_DEBUG > 10)569 if (SWISH_DEBUG > 10) 564 570 swish_debug_msg("forcing end of word: >%c<", c); 565 571 … … 582 588 else 583 589 { 584 if ( WORD_DEBUG > 10)590 if (SWISH_DEBUG > 10) 585 591 swish_debug_msg("skipping word >%s< -- too short: %d", word, wl); 586 592 } … … 594 600 { 595 601 596 if ( WORD_DEBUG > 10)602 if (SWISH_DEBUG > 10) 597 603 swish_debug_msg("start a word with >%c<", c); 598 604 … … 631 637 ) 632 638 { 633 639 640 set_debug(); /* in case this is called without ever swish_init_config() */ 641 634 642 if (swish_is_ascii( str )) 635 643 { … … 670 678 end = 0; 671 679 672 if ( WORD_DEBUG > 8)680 if (SWISH_DEBUG > 8) 673 681 swish_debug_msg("Before: %ls", word); 674 682 … … 721 729 } 722 730 723 if ( WORD_DEBUG > 8)731 if (SWISH_DEBUG > 8) 724 732 swish_debug_msg("After: %ls (stripped %d start chars, %d end chars)", word, start, end); 725 733 … … 735 743 end = 0; 736 744 737 if ( WORD_DEBUG > 8)745 if (SWISH_DEBUG > 8) 738 746 swish_debug_msg("Before: %s", word); 739 747 … … 786 794 } 787 795 788 if ( WORD_DEBUG > 8)796 if (SWISH_DEBUG > 8) 789 797 swish_debug_msg("After: %s (stripped %d start chars, %d end chars)", word, start, end); 790 798 … … 811 819 size_t len = xmlStrlen(word); 812 820 813 if ( WORD_DEBUG > 4)821 if (SWISH_DEBUG > 4) 814 822 { 815 823 swish_debug_msg(" >>>>>>>>swish_Word<<<<<<<<: %s", word); … … 823 831 thisword->word = swish_xstrdup(word); 824 832 thisword->position = word_pos; 825 thisword->metaname = swish_xstrdup(metaname); 826 thisword->context = swish_xstrdup(context); 833 834 if (metaname != NULL) 835 thisword->metaname = swish_xstrdup(metaname); 836 else 837 thisword->metaname = swish_xstrdup((xmlChar*)SWISH_DEFAULT_METANAME); 838 839 if (context != NULL) 840 thisword->context = swish_xstrdup(context); 841 else 842 thisword->context = swish_xstrdup((xmlChar*)SWISH_DEFAULT_METANAME); 843 827 844 thisword->end_offset = offset - 1; 828 845 thisword->start_offset = offset - len; libswish3/trunk/src/swish_lint.c
r1913 r1923 59 59 void swish_version() 60 60 { 61 printf("libswish pversion %s\n", SWISH_VERSION);61 printf("libswish3 version %s\n", SWISH_VERSION); 62 62 } 63 63
