Changeset 1927
- Timestamp:
- 04/20/07 17:54:55 (1 year ago)
- Files:
-
- libswish3/trunk/bindings/perl/3.xs (modified) (28 diffs)
- libswish3/trunk/bindings/perl/lib/SWISH/3.pm (modified) (1 diff)
- libswish3/trunk/bindings/perl/lib/SWISH/3/Analyzer.pm (added)
- libswish3/trunk/bindings/perl/lib/SWISH/3/Parser.pm (modified) (2 diffs)
- libswish3/trunk/bindings/perl/t/01slurp.t (modified) (1 diff)
- libswish3/trunk/bindings/perl/t/03parse_file.t (modified) (1 diff)
- libswish3/trunk/bindings/perl/t/05latin1.t (modified) (3 diffs)
- libswish3/trunk/bindings/perl/t/10tokenize.t (modified) (2 diffs)
- libswish3/trunk/bindings/perl/typemap (modified) (1 diff)
- libswish3/trunk/doc/libswish3.3.pod (modified) (10 diffs)
- libswish3/trunk/perl/docmaker.pl (modified) (1 diff)
- libswish3/trunk/src/libswish3/Makefile.am (modified) (1 diff)
- libswish3/trunk/src/libswish3/analyzer.c (added)
- libswish3/trunk/src/libswish3/config.c (modified) (13 diffs)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (10 diffs)
- libswish3/trunk/src/libswish3/mem.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/parser.c (modified) (23 diffs)
- libswish3/trunk/src/libswish3/string.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/swish.c (modified) (3 diffs)
- libswish3/trunk/src/libswish3/words.c (modified) (37 diffs)
- libswish3/trunk/src/swish_lint.c (modified) (6 diffs)
- libswish3/trunk/src/swish_words.c (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/bindings/perl/3.xs
r1925 r1927 33 33 #include <libxml/xmlIO.h> 34 34 35 #include <stdarg.h> 35 36 #include <libswish3.h> 37 38 /* global debug var -- set with swish_init() */ 39 extern int SWISH_DEBUG; 40 41 42 /* some nice XS macros from KS - thanks Marvin! */ 43 #define START_SET_OR_GET_SWITCH \ 44 SV *RETVAL = &PL_sv_undef; \ 45 /* if called as a setter, make sure the extra arg is there */ \ 46 if (ix % 2 == 1) { \ 47 if (items != 2) \ 48 croak("usage: $object->set_xxxxxx($val)"); \ 49 } \ 50 else { \ 51 if (items != 1) \ 52 croak("usage: $object->get_xxxxx()"); \ 53 } \ 54 switch (ix) { 55 56 #define END_SET_OR_GET_SWITCH \ 57 default: croak("Internal error. ix: %d", ix); \ 58 break; /* probably unreachable */ \ 59 } \ 60 if (ix % 2 == 0) { \ 61 XPUSHs( sv_2mortal(RETVAL) ); \ 62 XSRETURN(1); \ 63 } \ 64 else { \ 65 XSRETURN(0); \ 66 } 67 36 68 37 69 … … 42 74 /* private package vars */ 43 75 44 static char * callback_method = "handler"; 76 #define DEFAULT_BASE_CLASS "SWISH::3::Parser" 77 #define CONFIG_CLASS "SWISH::3::Config" 78 #define ANALYZER_CLASS "SWISH::3::Analyzer" 79 45 80 static HV * SubClasses = (HV*)NULL; 46 81 static int nClasses = 5; /* match Classes[] ?? */ … … 58 93 /* private functions */ 59 94 60 static void _remember_handler(SV* handler)95 static void sp_remember_handler(SV* handler) 61 96 { 62 97 dTHX; … … 69 104 } 70 105 71 static void _make_subclasses( char * class )106 static void sp_make_subclasses( char * class ) 72 107 { 73 108 /* form() is the Perl API call that will "join" our strings */ … … 97 132 } 98 133 99 static SV * _make_object( char* CLASS, IV data ) 134 /* make a Perl blessed object from a C pointer */ 135 static SV * sp_ptr_to_object( char* CLASS, IV data ) 100 136 { 101 137 dTHX; … … 105 141 } 106 142 107 static void _example_of_call_method( char* class, IV data ) 108 { 109 dTHX; 110 dSP; /* !!! do NOT use dXSARGS -- for some reason causes panic memory wrap */ 111 SV*o = _make_object(class,data); 112 113 PUSHMARK(SP); 114 XPUSHs(o); 115 PUTBACK; 116 117 call_method(callback_method, G_DISCARD); 118 } 119 120 121 static char * _which_class( char * c ) 122 { 123 dTHX; 143 static char * sp_get_objects_class( SV* object ) 144 { 145 dTHX; 146 char * class = sv_reftype(SvRV(object), 1); 147 //warn("object belongs to %s\n", class); 148 return class; 149 } 150 151 static HV * sp_is_hash_object( SV* object ) 152 { 153 dTHX; 154 HV * hash = NULL; 155 char * class = sp_get_objects_class( object ); 156 if (SvROK(object) && SvTYPE(SvRV(object))==SVt_PVHV) 157 hash = (HV*)SvRV(object); 158 else if (SvROK(object) && SvTYPE(SvRV(object))==SVt_PVMG) 159 croak("%s is a magic reference not a hash reference", class); 160 else 161 croak("%s is reference but not a hash reference", class); 162 163 return hash; 164 } 165 166 static void sp_describe_object( SV* object ) 167 { 168 warn("describing object\n"); 169 char * str = "foo"; //SvPV( object, PL_na ); 170 if (SvROK(object)) 171 { 172 if (SvTYPE(SvRV(object))==SVt_PVHV) 173 warn("%s is a magic blessed reference\n", str); 174 else if (SvTYPE(SvRV(object))==SVt_PVMG) 175 warn("%s is a magic reference", str); 176 else if (SvTYPE(SvRV(object))==SVt_IV) 177 warn("%s is a IV reference (pointer)", str); 178 else 179 warn("%s is a reference of some kind", str); 180 } 181 else 182 { 183 warn("%s is not a reference", str); 184 if (sv_isobject(object)) 185 warn("however, %s is an object", str); 186 187 188 } 189 } 190 191 /* return the C pointer from a Perl blessed O_OBJECT */ 192 static IV sp_ptr_from_object( SV* object ) 193 { 194 dTHX; 195 return SvIV((SV*)SvRV( object )); 196 } 197 198 /* lookup the class name from the global hash */ 199 static char * sp_which_class( char * c ) 200 { 201 dTHX; 202 if (SubClasses == (HV*)NULL) 203 sp_make_subclasses(DEFAULT_BASE_CLASS); 204 124 205 SV** sv = hv_fetch( SubClasses, c, strlen(c), 0 ); 125 206 if ( !sv ) … … 129 210 } 130 211 131 static SV * _get_object_key( SV* object, char * name ) 132 { 133 dTHX; 134 char * class = sv_reftype(SvRV(object), 1); 135 HV* hash; 136 //printf("looking for %s in %s\n", name, class); 137 138 if (SvROK(object) && SvTYPE(SvRV(object))==SVt_PVHV) 139 hash = (HV*)SvRV(object); 140 else if (SvROK(object) && SvTYPE(SvRV(object))==SVt_PVMG) 141 croak("%s is a reference but is a magic reference", class); 142 else 143 croak("%s is a reference but is not a hash reference", class); 144 145 212 /* fetch a hash value from an object (i.e. a generic accessor) */ 213 static SV * sp_get_object_key( SV* object, char * name ) 214 { 215 dTHX; 216 char * class = sp_get_objects_class( object ); 217 //warn("looking for %s in %s\n", name, class); 218 HV* hash = sp_is_hash_object( object ); 146 219 SV** sv = hv_fetch(hash, name, strlen(name), 0); 147 220 … … 187 260 188 261 189 void s wish_perl_test_handler(swish_ParseData * parse_data)262 void sp_test_handler( swish_ParseData * parse_data ) 190 263 { 191 264 warn("handler called!\n"); … … 196 269 } 197 270 198 void s wish_perl_handler( swish_ParseData* parse_data )271 void sp_handler( swish_ParseData* parse_data ) 199 272 { 200 273 dTHX; 201 274 dSP; 202 275 203 char * class = _which_class("Data"); 204 SV * obj = _make_object(class, (IV)parse_data); 205 206 276 char * class = sp_which_class("Data"); 277 SV * obj = sp_ptr_to_object(class, (IV)parse_data); 278 207 279 PUSHMARK(SP); 208 280 XPUSHs(obj); … … 212 284 } 213 285 214 /* 286 287 /* this regex wizardry cribbed from KS - thanks Marvin! */ 215 288 swish_WordList * 216 swish_perl_re_tokenizer(xmlChar * string, 217 xmlChar * metaname, 218 xmlChar * context, 219 int maxwordlen, 220 int minwordlen, 221 int word_pos, 222 int offset) 223 { 224 225 226 227 228 } 229 */ 230 231 232 /*************************************************************************************/ 289 sp_tokenize(swish_Analyzer * analyzer, xmlChar * str, ...) 290 { 291 unsigned int wpos, offset, num_code_points; 292 xmlChar *meta, *ctxt; 293 SV *token_re; 294 swish_WordList *list; 295 va_list args; 296 va_start(args, str); 297 wpos = va_arg(args, unsigned int); 298 offset = va_arg(args, unsigned int); 299 meta = va_arg(args, xmlChar *); 300 ctxt = va_arg(args, xmlChar *); 301 va_end(args); 302 303 MAGIC *mg = NULL; 304 REGEXP *rx = NULL; 305 SV *wrapper = sv_newmortal(); 306 xmlChar *str_start = str; 307 int str_len = strlen((char*)str); 308 xmlChar *str_end = str_start + str_len; 309 310 token_re = analyzer->regex; /* TODO is this right ?? */ 311 312 /* extract regexp struct from qr// entity */ 313 if (SvROK(token_re)) { 314 SV *sv = SvRV(token_re); 315 if (SvMAGICAL(sv)) 316 mg = mg_find(sv, PERL_MAGIC_qr); 317 } 318 if (!mg) 319 croak("not a qr// entity"); 320 rx = (REGEXP*)mg->mg_obj; 321 322 /* fake up an SV wrapper to feed to the regex engine */ 323 sv_upgrade(wrapper, SVt_PV); 324 SvREADONLY_on(wrapper); 325 SvLEN(wrapper) = 0; 326 SvUTF8_on(wrapper); /* do UTF8 matching -- TODO conditional on swish_is_ascii() ?? */ 327 328 /* wrap the string in an SV to please the regex engine */ 329 SvPVX(wrapper) = str_start; 330 SvCUR_set(wrapper, str_len); 331 SvPOK_on(wrapper); 332 333 list = swish_init_WordList(); 334 num_code_points = 0; 335 336 while ( pregexec(rx, str, str_end, str, 1, wrapper, 1) ) 337 { 338 xmlChar * start_ptr = str + rx->startp[0]; 339 xmlChar * end_ptr = str + rx->endp[0]; 340 int start, end, tok_len; 341 xmlChar * token; 342 343 /* get start and end offsets in Unicode code points */ 344 for( ; str < start_ptr; num_code_points++) 345 { 346 str += swish_utf8_chr_len(str); 347 if (str > str_end) 348 croak("scanned past end of '%s'", str_start); 349 } 350 351 start = num_code_points; 352 353 for( ; str < end_ptr; num_code_points++) 354 { 355 str += swish_utf8_chr_len(str); 356 if (str > str_end) 357 croak("scanned past end of '%s'", str_start); 358 } 359 360 end = num_code_points; 361 362 tok_len = end_ptr - start_ptr; /* bytes */ 363 364 /* TODO add to list based on max, min, etc */ 365 366 /* equivalent to swish_xstrdup() 367 -- TODO better way, since add_word() will also xstrdup */ 368 369 if (SWISH_DEBUG) 370 { 371 token = SvPV( newSVpvn(start_ptr, tok_len), PL_na ); 372 warn("%s (%d %d)\n", token, start + 1, end); 373 } 374 375 } 376 377 return list; 378 } 379 380 381 /******************************************************************************* 382 383 end our native C helpers, start the XS 384 385 ********************************************************************************/ 386 387 233 388 MODULE = SWISH::3 PACKAGE = SWISH::3 234 389 … … 308 463 309 464 PROTOTYPES: enable 310 465 466 311 467 void 312 _make_subclasses (self) 313 SV * self 314 315 PREINIT: 316 char* class; 317 468 _init_swish(class) 469 char * class 470 471 CODE: 472 swish_init(); 473 474 475 swish_Parser * 476 _init(CLASS, config, analyzer, handler) 477 char * CLASS 478 SV * config 479 SV * analyzer 480 SV * handler 481 318 482 CODE: 319 class = sv_reftype(SvRV(self), 1); 320 //printf("parent class is %s\n", class); 321 _make_subclasses(class); 322 323 483 sp_make_subclasses(CLASS); 484 sp_remember_handler(handler); 485 RETVAL = swish_init_parser( 486 (swish_Config*)sp_ptr_from_object(config), 487 (swish_Analyzer*)sp_ptr_from_object(analyzer), 488 &sp_handler, 489 NULL); 490 491 RETVAL->config->ref_cnt++; 492 RETVAL->analyzer->ref_cnt++; 493 RETVAL->ref_cnt++; 494 495 496 OUTPUT: 497 RETVAL 498 499 500 324 501 void 325 _cleanup(self) 326 SV* self; 327 328 CODE: 329 /* TODO */ 502 DESTROY(self) 503 swish_Parser * self 504 505 CODE: 506 //warn("DESTROYing parser"); 507 self->config->ref_cnt--; 508 self->analyzer->ref_cnt--; 509 self->ref_cnt--; 510 if (self->ref_cnt < 1) 511 { 512 # check too for our config and analyzer 513 # and free them if necessary 514 # this is necessary because the Perl 515 # objects that init'd them may have already 516 # been destroyed. 517 //warn("config ref_cnt = %d", self->config->ref_cnt); 518 //warn("analyzer ref_cnt = %d", self->analyzer->ref_cnt); 519 if (self->config->ref_cnt < 1) 520 { 521 //warn("freeing config"); 522 swish_free_config(self->config); 523 } 524 if (self->analyzer->ref_cnt < 1) 525 { 526 //warn("freeing analyzer"); 527 swish_free_analyzer(self->analyzer); 528 } 529 //warn("freeing parser"); 530 swish_free_parser(self); 531 swish_cleanup(); 532 } 330 533 331 534 … … 333 536 SV* 334 537 slurp_file(self, filename) 335 SV* self; 336 char* filename; 337 338 CODE: 339 RETVAL = newSVpv( (const char*)swish_slurp_file((xmlChar*)filename), 0 ); 340 341 OUTPUT: 342 RETVAL 343 344 345 void 346 _init_parser(self) 347 SV* self; 348 349 CODE: 350 swish_init_parser(); 351 _remember_handler(_get_object_key(self,callback_method)); 352 353 354 void 355 _free(self) 356 SV* self; 357 358 CODE: 359 swish_free_parser(); 360 361 # 362 # TODO: pass our own _word_tokenizer() callback so we can use Perl regexp 363 # 538 SV * self; 539 char * filename; 540 541 PREINIT: 542 xmlChar * buf; 543 544 CODE: 545 buf = swish_slurp_file((xmlChar*)filename); 546 RETVAL = newSVpv( (const char*)buf, 0 ); 547 swish_xfree(buf); 548 549 OUTPUT: 550 RETVAL 551 364 552 365 553 … … 371 559 PREINIT: 372 560 char * file; 373 SV * config;374 561 375 562 CODE: 376 563 file = SvPV(filename, PL_na); 377 config = _get_object_key(self,"config");378 564 379 565 # need to swap return values to make it Perlish 380 RETVAL = swish_parse_file((xmlChar*)file, 381 (swish_Config*)SvIV((SV*)SvRV( config )), 382 &swish_perl_handler, 383 NULL, 566 RETVAL = swish_parse_file( (swish_Parser*)sp_ptr_from_object(self), 567 (xmlChar*)file, 384 568 (void*)SvREFCNT_inc( self ) 385 569 ) 386 570 ? 0 387 571 : 1; 572 573 SvREFCNT_dec( self ); 388 574 389 575 OUTPUT: … … 397 583 398 584 PREINIT: 399 SV* config; 400 char* buf; 401 402 CODE: 403 config = _get_object_key(self,"config"); 404 buf = SvPV(buffer, PL_na); 585 char * buf; 586 587 CODE: 588 buf = SvPV(buffer, PL_na); 405 589 406 RETVAL = swish_parse_buffer((xmlChar*)buf, 407 (swish_Config*)SvIV((SV*)SvRV( config )), 408 &swish_perl_handler, 409 NULL, 590 RETVAL = swish_parse_buffer((swish_Parser*)sp_ptr_from_object(self), 591 (xmlChar*)buf, 410 592 (void*)SvREFCNT_inc( self ) 411 593 ) … … 417 599 RETVAL 418 600 419 420 421 swish_WordList * 422 tokenize(self, str, ...) 423 SV* self; 424 SV* str; 425 426 PREINIT: 427 char * CLASS; 428 char * metaname = SWISH_DEFAULT_METANAME; 429 char * context = SWISH_DEFAULT_METANAME; 430 int maxwordlen = SWISH_MAX_WORD_LEN; 431 int minwordlen = SWISH_MIN_WORD_LEN; 432 int word_pos = 0; 433 int offset = 0; 434 435 CODE: 436 CLASS = _which_class("WordList"); 437 438 if ( items > 2 ) 439 { 440 metaname = SvPV(ST(2), PL_na); 441 442 if ( items > 3 ) 443 context = SvPV(ST(3), PL_na); 444 445 if ( items > 4 ) 446 maxwordlen = (int)SvIV(ST(4)); 447 448 if ( items > 5 ) 449 minwordlen = (int)SvIV(ST(5)); 450 451 if ( items > 6 ) 452 word_pos = (int)SvIV(ST(6)); 453 454 if ( items > 7 ) 455 offset = (int)SvIV(ST(7)); 456 457 } 458 459 RETVAL = swish_tokenize( 460 (xmlChar*)SvPV(str, PL_na), 461 (xmlChar*)metaname, 462 (xmlChar*)context, 463 maxwordlen, 464 minwordlen, 465 word_pos, 466 offset); 467 468 RETVAL->ref_cnt++; 469 470 OUTPUT: 471 RETVAL 472 601 602 # parser accessor/mutators 603 void 604 _set_or_get(self, ...) 605 swish_Parser * self; 606 ALIAS: 607 set_config = 1 608 get_config = 2 609 set_analyzer = 3 610 get_analyzer = 4 611 set_handler = 5 612 get_handler = 6 613 set_stash = 7 614 get_stash = 8 615 PPCODE: 616 { 617 START_SET_OR_GET_SWITCH 618 619 case 1: self->config = (swish_Config*)sp_ptr_from_object(ST(1)); 620 break; 621 622 case 2: RETVAL = sp_ptr_to_object(CONFIG_CLASS, (IV)self->config); 623 self->config->ref_cnt++; 624 break; 625 626 case 3: self->analyzer = (swish_Analyzer*)sp_ptr_from_object(ST(1)); 627 break; 628 629 case 4: RETVAL = sp_ptr_to_object(ANALYZER_CLASS, (IV)self->analyzer); 630 self->analyzer->ref_cnt++; 631 break; 632 633 case 5: sp_remember_handler(ST(1)); 634 break; 635 636 case 6: RETVAL = callback_handler; 637 break; 638 639 case 7: self->stash = (void*)SvREFCNT_inc( ST(1) ); 640 break; 641 642 case 8: RETVAL = (SV*)self->stash; 643 break; 644 645 END_SET_OR_GET_SWITCH 646 } 473 647 474 648 … … 477 651 MODULE = SWISH::3 PACKAGE = SWISH::3::Parser::Word 478 652 479 PROTOTYPES: disable653 PROTOTYPES: enable 480 654 481 655 SV * … … 541 715 MODULE = SWISH::3 PACKAGE = SWISH::3::Parser::Doc 542 716 543 PROTOTYPES: disable717 PROTOTYPES: enable 544 718 545 719 SV* … … 626 800 MODULE = SWISH::3 PACKAGE = SWISH::3::Parser::Property 627 801 628 PROTOTYPES: disable802 PROTOTYPES: enable 629 803 630 804 … … 634 808 MODULE = SWISH::3 PACKAGE = SWISH::3::Parser::WordList 635 809 636 PROTOTYPES: disable810 PROTOTYPES: enable 637 811 638 812 swish_Word * … … 644 818 645 819 CODE: 646 CLASS = _which_class("Word");820 CLASS = sp_which_class("Word"); 647 821 if (self->current == NULL) 648 822 { … … 666 840 CODE: 667 841 self->ref_cnt--; 668 if ( !self->ref_cnt)842 if (self->ref_cnt < 1) 669 843 { 670 844 swish_free_WordList(self); … … 677 851 MODULE = SWISH::3 PACKAGE = SWISH::3::Parser::Data 678 852 679 PROTOTYPES: disable853 PROTOTYPES: enable 680 854 681 855 SV* 682 856 parser(self) 683 857 swish_ParseData * self 684 685 PREINIT: 686 SV* parser; 687 858 688 859 CODE: 689 860 RETVAL = self->user_data; … … 698 869 699 870 PREINIT: 700 char* CLASS; 701 702 CODE: 703 CLASS = "SWISH::3::Config"; 871 char* CLASS = "SWISH::3::Config"; 872 873 CODE: 704 874 RETVAL = self->config; 705 875 RETVAL->ref_cnt++; … … 733 903 734 904 CODE: 735 CLASS = _which_class("Doc");905 CLASS = sp_which_class("Doc"); 736 906 RETVAL = self->docinfo; 737 907 … … 747 917 748 918 CODE: 749 CLASS = _which_class("WordList");919 CLASS = sp_which_class("WordList"); 750 920 751 921 # MUST increment refcnt 2x so that SWISH::3::Parser::WordList::DESTROY … … 777 947 MODULE = SWISH::3 PACKAGE = SWISH::3::Config 778 948 779 PROTOTYPES: disable949 PROTOTYPES: enable 780 950 781 951 AV* … … 827 997 CODE: 828 998 RETVAL = swish_init_config(); 829 # RETVAL->ref_cnt++; 999 RETVAL->ref_cnt++; 1000 830 1001 831 1002 OUTPUT: … … 865 1036 866 1037 CODE: 1038 //warn("DESTROYing swish_Config object"); 867 1039 self->ref_cnt--; 868 if ( !self->ref_cnt)1040 if (self->ref_cnt < 1) 869 1041 { 1042 //warn("freeing swish_Config struct"); 870 1043 swish_free_config(self); 871 1044 } 872 1045 1046 # ******************************************************************************* 1047 1048 MODULE = SWISH::3 PACKAGE = SWISH::3::Analyzer 1049 1050 PROTOTYPES: enable 1051 1052 swish_Analyzer * 1053 init(CLASS, config, regex) 1054 char * CLASS; 1055 SV * config; 1056 SV * regex; 1057 1058 CODE: 1059 RETVAL = swish_init_analyzer( (swish_Config*)sp_ptr_from_object(config) ); 1060 RETVAL->ref_cnt++; 1061 RETVAL->regex = (void*)SvREFCNT_inc( regex ); 1062 RETVAL->tokenizer = &sp_tokenize; 1063 1064 OUTPUT: 1065 RETVAL 1066 1067 1068 1069 void 1070 DESTROY(self) 1071 swish_Analyzer * self 1072 1073 CODE: 1074 //warn("DESTROYing analyzer"); 1075 self->ref_cnt--; 1076 if (self->ref_cnt < 1) 1077 { 1078 //warn("freeing analyzer"); 1079 swish_free_analyzer(self); 1080 } 1081 1082 1083 # tokenize() from Perl space uses same C func as tokenizer callback 1084 swish_WordList * 1085 tokenize(self, str, ...) 1086 SV * self; 1087 SV * str; 1088 1089 PREINIT: 1090 char * CLASS; 1091 xmlChar * metaname = SWISH_DEFAULT_METANAME; 1092 xmlChar * context = SWISH_DEFAULT_METANAME; 1093 unsigned int word_pos = 0; 1094 unsigned int offset = 0; 1095 xmlChar * buf = SvPV(str, PL_na); 1096 1097 CODE: 1098 CLASS = sp_which_class("WordList"); 1099 1100 if (!SvUTF8(str)) 1101 { 1102 if (swish_is_ascii(buf)) 1103 SvUTF8_on(str); /* flags original SV ?? */ 1104 else 1105 croak("%s is not flagged as a UTF-8 string and is not ASCII", buf); 1106 } 1107 1108 if ( items > 2 ) 1109 { 1110 word_pos = (int)SvIV(ST(2)); 1111 1112 if ( items > 3 ) 1113 offset = (int)SvIV(ST(3)); 1114 1115 if ( items > 4 ) 1116 metaname = SvPV(ST(4), PL_na); 1117 1118 if ( items > 5 ) 1119 context = SvPV(ST(5), PL_na); 1120 1121 } 1122 1123 RETVAL = sp_tokenize( 1124 (swish_Analyzer*)sp_ptr_from_object(self), 1125 buf, 1126 word_pos, 1127 offset, 1128 metaname, 1129 context 1130 ); 1131 1132 RETVAL->ref_cnt++; 1133 1134 /* TODO do we need to worry about free()ing metaname and context ?? */ 1135 1136 OUTPUT: 1137 RETVAL 1138 1139 1140 # TODO: get/set methods, including way to set tokenizer func ref 1141 libswish3/trunk/bindings/perl/lib/SWISH/3.pm
r1921 r1927 10 10 require XSLoader; 11 11 XSLoader::load('SWISH::3', $VERSION); 12 13 $ENV{SWISH3} = 1; # flag let's SWISH::Prog et al know we are version314 # TODO doesn't libswish3 do this already?15 12 16 13 1; libswish3/trunk/bindings/perl/lib/SWISH/3/Parser.pm
r1921 r1927 3 3 use strict; 4 4 use warnings; 5 use base qw( SWISH::3::Object );5 use Carp; 6 6 use SWISH::3; # in case this class gets 'use'd directly 7 7 use SWISH::3::Parser::Doc; … … 11 11 use SWISH::3::Parser::WordList; 12 12 use SWISH::3::Config; 13 use SWISH::3::Analyzer; 13 14 14 15 use Devel::Peek; 16 use Data::Dump qw( pp ); 15 17 16 18 our $VERSION = '0.01'; 17 19 18 __PACKAGE__->mk_accessors(qw( config handler indexer )); 19 20 sub init 20 sub new 21 21 { 22 my $self = shift; 23 $self->_make_subclasses; 24 $self->_init_config; 25 $self->_init_handler; 26 $self->_init_parser; 22 my $proto = shift; 23 my $class = ref($proto) || $proto; 24 $class->_init_swish; 25 my %args = @_; 26 my $config = SWISH::3::Config->new; 27 if ($args{config}) 28 { 29 $config->add($args{config}); 30 } 31 $args{analyzer} ||= SWISH::3::Analyzer->new(config => $config); 32 unless ($args{handler}) 33 { 34 carp( "WARNING: using default SWISH::3::Parser::Data handler -- " 35 . "that's likely not what you want"); 36 $args{handler} = \&SWISH::3::Parser::Doc::handler; 37 } 38 my $self = $class->_init($config, $args{analyzer}, $args{handler}); 27 39 return $self; 28 }29 30 sub DESTROY31 {32 my $self = shift;33 34 #carp "about to DESTROY 3 object";35 #Dump $self;36 37 $self->_free;38 $self->_cleanup;39 }40 41 sub _init_config42 {43 my $self = shift;44 my $conf = SWISH::3::Config->new;45 46 if ($self->config)47 {48 $conf->add($self->config);49 }50 51 $self->config($conf);52 }53 54 sub _init_handler55 {56 my $self = shift;57 58 unless (exists($self->{handler}) && ref($self->{handler}) eq 'CODE')59 {60 Carp::carp "WARNING: using default SWISH::3::Parser::Data handler -- "61 . "that's likely not what you want";62 $self->handler(\&SWISH::3::Parser::Doc::handler);63 }64 40 } 65 41 libswish3/trunk/bindings/perl/t/01slurp.t
r1921 r1927 3 3 use Devel::Peek; 4 4 5 BEGIN { use_ok('SWISH::3::Parser') };5 use_ok('SWISH::3::Parser'); 6 6 7 ok( my $parser = SWISH::3::Parser->new,"new object");7 ok(my $parser = SWISH::3::Parser->new(handler => sub { }), "new object"); 8 8 9 ok( my $buf = $parser->slurp_file("t/test.html"), "slurp file"); 9 ok(my $buf = $parser->slurp_file("t/test.html"), "slurp file"); 10 10 11 #diag($buf); 11 12 12 #Dump $ buf;13 #Dump $parser; libswish3/trunk/bindings/perl/t/03parse_file.t
r1921 r1927 10 10 } 11 11 12 ok(my $parser = SWISH::3::Parser->new(handler => sub { }), 13 "new parser"); 12 ok(my $parser = SWISH::3::Parser->new(handler => sub { }), "new parser"); 14 13 15 14 #monitor('parser' => \$parser); libswish3/trunk/bindings/perl/t/05latin1.t
r1921 r1927 2 2 use Carp; 3 3 use Devel::Peek; 4 4 5 #use Devel::Monitor qw(:all); 5 6 … … 9 10 } 10 11 11 ok(my $parser = SWISH::3::Parser->new , "new parser");12 ok(my $parser = SWISH::3::Parser->new(handler => sub { }), "new parser"); 12 13 13 14 #monitor('parser' => \$parser); … … 21 22 { 22 23 ok($r += $parser->parse_file("t/latin1.xml"), "parse latin1 XML"); 24 23 25 #diag("r = $r"); 24 26 } libswish3/trunk/bindings/perl/t/10tokenize.t
r1923 r1927 7 7 BEGIN 8 8 { 9 use_ok('SWISH::3:: Parser');9 use_ok('SWISH::3::Analyzer'); 10 10 use_ok('SWISH::3::Constants'); 11 11 } 12 12 13 ok(my $ swish3 = SWISH::3::Parser->new(handler => sub { }), "new swish3");13 ok(my $analyzer = SWISH::3::Analyzer->new(), "new tokenizer"); 14 14 15 15 ok( 16 16 my $wlist = 17 $ swish3->tokenize(18 "now is the time, ain't it? or when else might it be!",19 'foo', 'bar', 100, 1, 13, 1420 ),17 $analyzer->tokenize( 18 "now is the time, ain't it? or when else might it be!", 19 13, 14, 'foo', 'bar' 20 ), 21 21 "wordlist" 22 22 ); … … 26 26 while (my $swishword = $wlist->next) 27 27 { 28 28 29 #diag('=' x 60); 29 30 for my $w (SWISH_WORD_FIELDS) 30 31 { 31 32 32 # diag(sprintf("%15s: %s\n", $w, $swishword->$w));33 # diag(sprintf("%15s: %s\n", $w, $swishword->$w)); 33 34 } 34 35 } libswish3/trunk/bindings/perl/typemap
r1923 r1927 9 9 swish_DocInfo * O_OBJECT 10 10 swish_Word * O_OBJECT 11 swish_Analyzer * O_OBJECT 12 swish_Parser * O_OBJECT 11 13 12 14 INPUT libswish3/trunk/doc/libswish3.3.pod
r1921 r1927 18 18 struct swish_Config 19 19 { 20 unsigned int ref_cnt; /* for script ing languages */21 void * stash; /* also for scripting languages */20 unsigned int ref_cnt; /* for script bindings */ 21 void * stash; /* for script bindings */ 22 22 xmlHashTablePtr conf; /* the meat */ 23 23 }; 24 25 24 26 25 struct swish_DocInfo … … 28 27 time_t mtime; 29 28 off_t size; 30 xmlChar *mime;31 xmlChar *encoding;32 xmlChar *uri;29 xmlChar *mime; 30 xmlChar
