Changeset 2161 for libswish3/trunk/bindings/perl/3.xs
- Timestamp:
- 09/20/08 01:05:54 (4 months ago)
- Files:
-
- libswish3/trunk/bindings/perl/3.xs (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/bindings/perl/3.xs
r2151 r2161 40 40 //sp_describe_object( (SV*)s3->stash ); 41 41 42 // hardcode this till we can get ENV var or similar 43 s3->analyzer->tokenlist = 0; 44 45 if (s3->analyzer->tokenlist) { 46 s3->analyzer->tokenizer = (&sp_tokenize3); 47 } 48 else { 49 s3->analyzer->tokenizer = (&sp_tokenize); 50 } 42 s3->analyzer->tokenizer = (&sp_tokenize3); 43 51 44 s3->analyzer->stash = sp_Stash_new(); 52 45 sp_Stash_set_char( s3->analyzer->stash, SELF_CLASS_KEY, ANALYZER_CLASS ); 46 53 47 s3->config->stash = sp_Stash_new(); 54 48 sp_Stash_set_char( s3->config->stash, SELF_CLASS_KEY, CONFIG_CLASS ); … … 357 351 # utility methods 358 352 353 void 354 describe(self, obj) 355 SV* self; 356 SV* obj; 357 358 CODE: 359 sp_describe_object(obj); 360 361 362 363 359 364 # tokenize() from Perl space uses same C func as tokenizer callback 360 swish_ WordList*365 swish_TokenIterator * 361 366 tokenize(self, str, ...) 362 SV* self;367 swish_3* self; 363 368 SV* str; 364 369 365 370 PREINIT: 366 371 char* CLASS; 367 swish_WordList* list; 368 xmlChar* metaname; 372 swish_TokenIterator* ti; 373 swish_TokenList* tl; 374 swish_MetaName* metaname; 375 xmlChar* meta; 369 376 xmlChar* context; 370 unsigned int word_pos;371 unsigned int offset;372 377 xmlChar* buf; 373 int numtokens;374 375 CODE:376 CLASS = WORDLIST_CLASS;377 list = swish_init_wordlist();378 list->ref_cnt++;379 meta name= (xmlChar*)SWISH_DEFAULT_METANAME;378 379 CODE: 380 CLASS = TOKENITERATOR_CLASS; 381 tl = swish_init_token_list(); 382 ti = swish_init_token_iterator(self->config, tl); 383 ti->ref_cnt++; 384 meta = (xmlChar*)SWISH_DEFAULT_METANAME; 380 385 context = (xmlChar*)SWISH_DEFAULT_METANAME; 381 word_pos = 0;382 offset = 0;383 386 buf = (xmlChar*)SvPV(str, PL_na); 384 387 385 388 // TODO reimplement as hashref arg 386 389 390 // TODO why this check?? 387 391 if (!SvUTF8(str)) 388 392 { … … 394 398 395 399 if ( items > 2 ) 396 { 397 word_pos = (int)SvIV(ST(2));398 400 { 401 meta = (xmlChar*)SvPV(ST(2), PL_na); 402 399 403 if ( items > 3 ) 400 offset = (int)SvIV(ST(3)); 401 402 if ( items > 4 ) 403 metaname = (xmlChar*)SvPV(ST(4), PL_na); 404 405 if ( items > 5 ) 406 context = (xmlChar*)SvPV(ST(5), PL_na); 407 408 //warn ("word_pos %d offset %d metaname %s context %s\n", word_pos, offset, metaname, context ); 409 410 } 404 context = (xmlChar*)SvPV(ST(3), PL_na); 405 406 //warn ("metaname %s context %s\n", metaname, context ); 407 408 } 409 410 metaname = swish_init_metaname(meta); 411 metaname->ref_cnt++; 411 412 412 numtokens = sp_tokenize( 413 (swish_3*)sp_extract_ptr(self), 414 buf, 415 list, 416 word_pos, 417 offset, 418 metaname, 419 context 420 ); 421 422 RETVAL = list; 423 /* TODO do we need to worry about free()ing metaname and context ?? */ 413 sp_tokenize3( self, buf, tl, metaname, context ); 414 415 RETVAL = ti; 416 424 417 425 418 OUTPUT: … … 428 421 429 422 430 # tokenize_isw() usesnative libswish3 tokenizer431 swish_ WordList*432 tokenize_ isw(self, str, ...)433 SV* self;423 # native libswish3 tokenizer 424 swish_TokenIterator * 425 tokenize_native(self, str, ...) 426 swish_3* self; 434 427 SV* str; 435 428 436 429 PREINIT: 437 430 char* CLASS; 438 swish_WordList* list; 439 xmlChar* metaname; 431 swish_TokenIterator* ti; 432 swish_TokenList* tl; 433 swish_MetaName* metaname; 434 xmlChar* meta; 440 435 xmlChar* context; 441 unsigned int word_pos;442 unsigned int offset;443 436 xmlChar* buf; 444 int numwords;445 446 CODE:447 CLASS = WORDLIST_CLASS;448 list = swish_init_wordlist();449 list->ref_cnt++;450 meta name= (xmlChar*)SWISH_DEFAULT_METANAME;451 context = (xmlChar*)SWISH_DEFAULT_METANAME;452 word_pos = 0;453 offset = 0;454 buf = (xmlChar*)SvPV(str, PL_na);455 437 438 CODE: 439 CLASS = TOKENITERATOR_CLASS; 440 tl = swish_init_token_list(); 441 ti = swish_init_token_iterator(self->config, tl); 442 ti->ref_cnt++; 443 meta = (xmlChar*)SWISH_DEFAULT_METANAME; 444 context = (xmlChar*)SWISH_DEFAULT_METANAME; 445 buf = (xmlChar*)SvPV(str, PL_na); 446 447 // TODO reimplement as hashref arg 448 456 449 if (!SvUTF8(str)) 457 450 { … … 463 456 464 457 if ( items > 2 ) 465 { 466 word_pos = (int)SvIV(ST(2));467 458 { 459 meta = (xmlChar*)SvPV(ST(2), PL_na); 460 468 461 if ( items > 3 ) 469 offset = (int)SvIV(ST(3)); 470 471 if ( items > 4 ) 472 metaname = (xmlChar*)SvPV(ST(4), PL_na); 473 474 if ( items > 5 ) 475 context = (xmlChar*)SvPV(ST(5), PL_na); 476 477 } 478 479 swish_init_words(); /* in case it wasn't initialized elsewhere... */ 480 numwords = swish_tokenize( 481 (swish_3*)sp_extract_ptr(self), 482 buf, 483 list, 484 word_pos, 485 offset, 486 metaname, 487 context 488 ); 489 490 RETVAL = list; 491 492 /* TODO do we need to worry about free()ing metaname and context ?? */ 462 context = (xmlChar*)SvPV(ST(3), PL_na); 463 464 //warn ("metaname %s context %s\n", metaname, context ); 465 466 } 467 468 metaname = swish_init_metaname(meta); 469 metaname->ref_cnt++; 470 471 swish_tokenize3( self, buf, tl, metaname, context ); 472 473 RETVAL = ti; 474 493 475 494 476 OUTPUT: … … 500 482 INCLUDE: XS/Config.xs 501 483 INCLUDE: XS/Analyzer.xs 502 INCLUDE: XS/WordList.xs503 INCLUDE: XS/Word.xs504 484 INCLUDE: XS/Doc.xs 505 485 INCLUDE: XS/Data.xs … … 511 491 INCLUDE: XS/xml2Hash.xs 512 492 INCLUDE: XS/Token.xs 513 493 INCLUDE: XS/TokenIterator.xs 494
