Changeset 1928
- Timestamp:
- 04/23/07 11:58:51 (1 year ago)
- Files:
-
- libswish3/trunk/bindings/perl/3.xs (modified) (23 diffs)
- libswish3/trunk/bindings/perl/lib/SWISH/3/Parser.pm (modified) (2 diffs)
- libswish3/trunk/bindings/perl/t/11get_set_parser.t (added)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (6 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/bindings/perl/3.xs
r1927 r1928 5 5 extern "C" { 6 6 #endif 7 #define PERL_NO_GET_CONTEXT 17 #define PERL_NO_GET_CONTEXT 8 8 #include "EXTERN.h" 9 9 #include "perl.h" … … 59 59 } \ 60 60 if (ix % 2 == 0) { \ 61 XPUSHs( sv_2mortal(RETVAL)); \61 XPUSHs( RETVAL ); \ 62 62 XSRETURN(1); \ 63 63 } \ … … 77 77 #define CONFIG_CLASS "SWISH::3::Config" 78 78 #define ANALYZER_CLASS "SWISH::3::Analyzer" 79 #define CONFIG_KEY "config" 80 #define ANALYZER_KEY "analyzer" 81 #define HANDLER_KEY "handler" 82 79 83 80 84 static HV * SubClasses = (HV*)NULL; … … 88 92 }; 89 93 90 static SV * callback_handler = (SV*)NULL;91 92 94 93 95 /* private functions */ 94 95 static void sp_remember_handler(SV* handler)96 {97 dTHX;98 if (callback_handler == (SV*)NULL)99 /* First time, so create a new SV */100 callback_handler = newSVsv(handler);101 else102 /* Been here before, so overwrite */103 SvSetSV(callback_handler, handler);104 }105 96 106 97 static void sp_make_subclasses( char * class ) … … 132 123 } 133 124 125 /* store SV* in a hash, incrementing its refcnt */ 126 static SV* 127 sp_hv_store( HV* h, const char* key, SV* val) 128 { 129 dTHX; 130 SV** ok; 131 ok = hv_store(h, key, strlen(key), SvREFCNT_inc(val), 0); 132 if (ok != NULL) 133 { 134 //warn("stored %s ok in hash: %s", key, SvPV( *ok, PL_na )); 135 } 136 else 137 { 138 croak("failed to store %s in hash", key); 139 } 140 return *ok; 141 } 142 143 /* fetch SV* from hash */ 144 static SV* 145 sp_hv_fetch( HV* h, const char* key ) 146 { 147 SV** ok; 148 ok = hv_fetch(h, key, strlen(key), 0); 149 if (ok != NULL) 150 { 151 //warn("fetched %s ok from hash: %s", key, SvPV( *ok, PL_na )); 152 } 153 else 154 { 155 croak("failed to fetch %s from hash", key); 156 } 157 return *ok; 158 } 159 160 /* delete SV* from hash, returning the deleted SV* */ 161 static SV* 162 sp_hv_delete( HV* h, const char* key ) 163 { 164 dTHX; 165 SV* oldval; 166 oldval = hv_delete(h, key, strlen(key), 0 ); 167 if (oldval != NULL) 168 { 169 //warn("deleted %s ok from hash: %s", key, SvPV( oldval, PL_na )); 170 } 171 else 172 { 173 croak("failed to delete %s from hash", key); 174 } 175 return oldval; 176 } 177 178 134 179 /* make a Perl blessed object from a C pointer */ 135 180 static SV * sp_ptr_to_object( char* CLASS, IV data ) … … 141 186 } 142 187 188 /* what class is an object blessed into ? */ 143 189 static char * sp_get_objects_class( SV* object ) 144 190 { … … 164 210 } 165 211 212 static void sp_dump_hash(SV* hash_ref) 213 { 214 HV* hash; 215 HE* hash_entry; 216 int num_keys, i; 217 SV* sv_key; 218 SV* sv_val; 219 int refcnt; 220 221 if (SvTYPE(SvRV(hash_ref))!=SVt_PVHV) 222 croak("hash_ref is not a hash reference"); 223 224 hash = (HV*)SvRV(hash_ref); 225 num_keys = hv_iterinit(hash); 226 for (i = 0; i < num_keys; i++) { 227 hash_entry = hv_iternext(hash); 228 sv_key = hv_iterkeysv(hash_entry); 229 sv_val = hv_iterval(hash, hash_entry); 230 refcnt = SvREFCNT(sv_val); 231 warn("%s => %s [%d]\n", SvPV(sv_key, PL_na), SvPV(sv_val, PL_na), refcnt); 232 } 233 return; 234 } 235 166 236 static void sp_describe_object( SV* object ) 167 237 { 238 dTHX; 168 239 warn("describing object\n"); 169 char * str = "foo"; //SvPV( object, PL_na );240 char * str = SvPV( object, PL_na ); 170 241 if (SvROK(object)) 171 242 { 172 if (SvTYPE(SvRV(object))==SVt_PVHV)243 if (SvTYPE(SvRV(object))==SVt_PVHV) 173 244 warn("%s is a magic blessed reference\n", str); 174 else if (SvTYPE(SvRV(object))==SVt_PVMG)245 else if (SvTYPE(SvRV(object))==SVt_PVMG) 175 246 warn("%s is a magic reference", str); 176 else if (SvTYPE(SvRV(object))==SVt_IV)247 else if (SvTYPE(SvRV(object))==SVt_IV) 177 248 warn("%s is a IV reference (pointer)", str); 178 else249 else 179 250 warn("%s is a reference of some kind", str); 180 251 } … … 187 258 188 259 } 260 Perl_sv_dump( object ); 261 Perl_sv_dump( (SV*)SvRV(object) ); 262 sp_dump_hash( object ); 189 263 } 190 264 … … 262 336 void sp_test_handler( swish_ParseData * parse_data ) 263 337 { 264 warn("handler called!\n"); 265 swish_debug_docinfo( parse_data->docinfo ); 266 swish_debug_wordlist( parse_data->wordlist ); 267 swish_debug_PropHash( parse_data->propHash ); 268 warn("\n"); 269 } 270 338 dTHX; 339 warn("handler called!\n"); 340 swish_debug_docinfo( parse_data->docinfo ); 341 swish_debug_wordlist( parse_data->wordlist ); 342 swish_debug_PropHash( parse_data->propHash ); 343 warn("\n"); 344 } 345 346 /* C wrapper for our Perl handler. 347 the parser object is passed in the parse_data stash. 348 we dereference it, pull out the SV* CODE ref, and execute 349 the Perl code. 350 */ 271 351 void sp_handler( swish_ParseData* parse_data ) 272 352 { … … 274 354 dSP; 275 355 276 char * class = sp_which_class("Data"); 277 SV * obj = sp_ptr_to_object(class, (IV)parse_data); 356 char* class = sp_which_class("Data"); 357 SV* obj = sp_ptr_to_object(class, (IV)parse_data); 358 swish_Parser* parser = (swish_Parser*)sp_ptr_from_object(parse_data->stash); 359 HV* stash = (HV*)SvRV((HV*)parser->stash); 360 SV* handler = sp_hv_fetch(stash, HANDLER_KEY); 278 361 279 362 PUSHMARK(SP); … … 281 364 PUTBACK; 282 365 283 call_sv( callback_handler, G_DISCARD);366 call_sv(handler, G_DISCARD); 284 367 } 285 368 … … 289 372 sp_tokenize(swish_Analyzer * analyzer, xmlChar * str, ...) 290 373 { 374 dTHX; 291 375 unsigned int wpos, offset, num_code_points; 292 376 xmlChar *meta, *ctxt; … … 464 548 PROTOTYPES: enable 465 549 466 550 # MUST call this before creating a SWISH::3::Config object 467 551 void 468 552 _init_swish(class) … … 480 564 SV * handler 481 565 566 # cache all the passed in objects in our stash 567 # and then just return them via accessors, rather 568 # than creating new objects each time. helps with ref_cnt sanity. 569 570 PREINIT: 571 HV* stash; 572 482 573 CODE: 574 stash = newHV(); 483 575 sp_make_subclasses(CLASS); 484 sp_remember_handler(handler); 576 sp_hv_store(stash, CONFIG_KEY, config); 577 sp_hv_store(stash, ANALYZER_KEY, analyzer); 578 sp_hv_store(stash, HANDLER_KEY, handler); 485 579 RETVAL = swish_init_parser( 486 580 (swish_Config*)sp_ptr_from_object(config), 487 581 (swish_Analyzer*)sp_ptr_from_object(analyzer), 488 582 &sp_handler, 489 NULL);583 (void*)newRV_inc((SV*)stash)); 490 584 491 RETVAL->config->ref_cnt++;492 RETVAL->analyzer->ref_cnt++;493 585 RETVAL->ref_cnt++; 586 //sp_describe_object(RETVAL->stash); 494 587 495 588 … … 502 595 DESTROY(self) 503 596 swish_Parser * self 597 598 PREINIT: 599 HV* stash; 504 600 505 601 CODE: 506 //warn("DESTROYing parser"); 507 self->config->ref_cnt--; 508 self->analyzer->ref_cnt--; 602 if (SWISH_DEBUG) 603 { 604 warn("DESTROYing parser %d", self); 605 warn("freeing parser stash"); 606 } 607 608 //sp_describe_object(self->stash); 609 stash = (HV*)SvRV((HV*)self->stash); 610 hv_undef(stash); 611 509 612 self->ref_cnt--; 510 613 if (self->ref_cnt < 1) 511 614 { 512 # check too for our config and analyzer 513 # and free them if necessary 514 # this is necessary because the Perl 515 # objects that init'd them may have already 516 # been destroyed. 517 //warn("config ref_cnt = %d", self->config->ref_cnt); 518 //warn("analyzer ref_cnt = %d", self->analyzer->ref_cnt); 519 if (self->config->ref_cnt < 1) 520 { 521 //warn("freeing config"); 522 swish_free_config(self->config); 523 } 524 if (self->analyzer->ref_cnt < 1) 525 { 526 //warn("freeing analyzer"); 527 swish_free_analyzer(self->analyzer); 528 } 529 //warn("freeing parser"); 615 if (SWISH_DEBUG) 616 warn("freeing parser %d", self); 617 530 618 swish_free_parser(self); 531 619 swish_cleanup(); … … 562 650 CODE: 563 651 file = SvPV(filename, PL_na); 564 652 SvREFCNT_inc(self); 653 565 654 # need to swap return values to make it Perlish 566 655 RETVAL = swish_parse_file( (swish_Parser*)sp_ptr_from_object(self), 567 656 (xmlChar*)file, 568 (void*) SvREFCNT_inc( self )657 (void*)self 569 658 ) 570 659 ? 0 571 660 : 1; 572 661 573 SvREFCNT_dec( self);662 SvREFCNT_dec(self); 574 663 575 664 OUTPUT: … … 586 675 587 676 CODE: 588 buf = SvPV(buffer, PL_na); 589 590 RETVAL = swish_parse_buffer((swish_Parser*)sp_ptr_from_object(self), 677 buf = SvPV(buffer, PL_na); 678 SvREFCNT_inc(self); 679 680 681 # need to swap return values to make it Perlish 682 RETVAL = swish_parse_file( (swish_Parser*)sp_ptr_from_object(self), 591 683 (xmlChar*)buf, 592 (void*) SvREFCNT_inc( self )593 ) 684 (void*)self 685 ) 594 686 ? 0 595 687 : 1; 596 688 689 SvREFCNT_dec(self); 597 690 598 691 OUTPUT: 599 692 RETVAL 600 693 601 694 602 695 # parser accessor/mutators 603 696 void … … 611 704 set_handler = 5 612 705 get_handler = 6 613 set_stash = 7 614 get_stash = 8 706 PREINIT: 707 HV* stash; 708 SV* oldval; 709 SV* newval; 710 swish_Config * conf; 711 swish_Analyzer * ana; 615 712 PPCODE: 616 713 { 714 stash = (HV*)SvRV((HV*)self->stash); 715 617 716 START_SET_OR_GET_SWITCH 618 717 619 case 1: self->config = (swish_Config*)sp_ptr_from_object(ST(1)); 718 case 1: 719 oldval = sp_hv_delete(stash, CONFIG_KEY); 720 conf = (swish_Config*)sp_ptr_from_object(oldval); 721 conf->ref_cnt--; 722 723 newval = sp_hv_store(stash, CONFIG_KEY, ST(1)); 724 self->config = (swish_Config*)sp_ptr_from_object(newval); 620 725 break; 621 726 622 case 2: RETVAL = sp_ptr_to_object(CONFIG_CLASS, (IV)self->config); 623 self->config->ref_cnt++; 727 case 2: RETVAL = sp_hv_fetch(stash, CONFIG_KEY); 624 728 break; 625 729 626 case 3: self->analyzer = (swish_Analyzer*)sp_ptr_from_object(ST(1)); 730 case 3: 731 oldval = sp_hv_delete(stash, ANALYZER_KEY); 732 ana = (swish_Analyzer*)sp_ptr_from_object(oldval); 733 ana->ref_cnt--; 734 735 newval = sp_hv_store(stash, ANALYZER_KEY, ST(1)); 736 self->analyzer = (swish_Analyzer*)sp_ptr_from_object(newval); 627 737 break; 628 738 629 case 4: RETVAL = sp_ptr_to_object(ANALYZER_CLASS, (IV)self->analyzer); 630 self->analyzer->ref_cnt++; 739 case 4: RETVAL = sp_hv_fetch(stash, ANALYZER_KEY); 631 740 break; 632 741 633 case 5: sp_remember_handler(ST(1)); 742 case 5: 743 oldval = sp_hv_delete(stash, HANDLER_KEY); 744 sp_hv_store(stash, HANDLER_KEY, ST(1)); 634 745 break; 635 746 636 case 6: RETVAL = callback_handler; 637 break; 638 639 case 7: self->stash = (void*)SvREFCNT_inc( ST(1) ); 640 break; 641 642 case 8: RETVAL = (SV*)self->stash; 747 case 6: RETVAL = sp_hv_fetch(stash, HANDLER_KEY); 643 748 break; 644 749 … … 852 957 853 958 PROTOTYPES: enable 959 854 960 855 961 SV* … … 858 964 859 965 CODE: 860 RETVAL = self-> user_data;966 RETVAL = self->stash; 861 967 862 968 OUTPUT: … … 869 975 870 976 PREINIT: 871 char* CLASS = "SWISH::3::Config";977 char* CLASS = CONFIG_CLASS; 872 978 873 979 CODE: … … 1036 1142 1037 1143 CODE: 1038 //warn("DESTROYing swish_Config object"); 1144 if (SWISH_DEBUG) 1145 { 1146 warn("DESTROYing swish_Config object %s [%d] [ref_cnt = %d]", 1147 SvPV(ST(0), PL_na), self, self->ref_cnt); 1148 } 1149 1039 1150 self->ref_cnt--; 1040 1151 if (self->ref_cnt < 1) 1041 1152 { 1042 //warn("freeing swish_Config struct"); 1153 if (SWISH_DEBUG) 1154 warn("freeing swish_Config %d", self); 1155 1043 1156 swish_free_config(self); 1044 1157 } … … 1072 1185 1073 1186 CODE: 1074 //warn("DESTROYing analyzer"); 1187 if (SWISH_DEBUG) 1188 { 1189 warn("DESTROYing swish_Analyzer object %s [%d] [ref_cnt = %d]", 1190 SvPV(ST(0), PL_na), self, self->ref_cnt); 1191 } 1192 1075 1193 self->ref_cnt--; 1076 1194 if (self->ref_cnt < 1) 1077 1195 { 1078 //warn("freeing analyzer"); 1196 if(SWISH_DEBUG) 1197 warn("freeing swish_Analyzer %d", self); 1198 1079 1199 swish_free_analyzer(self); 1080 1200 } libswish3/trunk/bindings/perl/lib/SWISH/3/Parser.pm
r1927 r1928 20 20 sub new 21 21 { 22 my $proto = shift;23 my $class = ref($proto) || $proto;22 my $proto = shift; 23 my $class = ref($proto) || $proto; 24 24 $class->_init_swish; 25 my %args = @_; 26 my $config = SWISH::3::Config->new; 27 if ($args{config}) 25 my %args = @_; 26 if ( $args{config} 27 && ref($args{config}) 28 && $args{config}->isa('SWISH::3::Config')) 28 29 { 29 $config->add($args{config}); 30 31 # do nothing 30 32 } 31 $args{analyzer} ||= SWISH::3::Analyzer->new(config => $config); 33 elsif ($args{config}) 34 { 35 my $c = SWISH::3::Config->new; 36 $c->add($args{config}); 37 $args{config} = $c; 38 } 39 else 40 { 41 $args{config} = SWISH::3::Config->new; 42 } 43 $args{analyzer} ||= SWISH::3::Analyzer->new(config => $args{config}); 32 44 unless ($args{handler}) 33 45 { … … 36 48 $args{handler} = \&SWISH::3::Parser::Doc::handler; 37 49 } 38 my $self = $class->_init($ config, $args{analyzer}, $args{handler});50 my $self = $class->_init($args{config}, $args{analyzer}, $args{handler}); 39 51 return $self; 40 52 } libswish3/trunk/src/libswish3/libswish3.h
r1927 r1928 205 205 struct swish_Config 206 206 { 207 unsignedint ref_cnt; /* for scripting languages */207 int ref_cnt; /* for scripting languages */ 208 208 void * stash; /* also for scripting languages */ 209 209 xmlHashTablePtr conf; /* the meat */ … … 212 212 struct swish_ConfigValue 213 213 { 214 unsigned intref_cnt;214 int ref_cnt; 215 215 unsigned int multi; /* indicates whether value is a string or hashref */ 216 216 unsigned int equal; /* indicates whether key/value pairs are equal */ … … 317 317 unsigned int tokenize; // should we parse into WordList 318 318 swish_WordList* (*tokenizer) (swish_Analyzer*, xmlChar*, ...); 319 xmlChar *(*stemmer) (xmlChar*);319 xmlChar* (*stemmer) (xmlChar*); 320 320 unsigned int lc; // should tokens be lowercased 321 void *stash; // for script bindings322 void *regex; // optional regex323 unsignedint ref_cnt; // for script bindings321 void *stash; // for script bindings 322 void *regex; // optional regex 323 int ref_cnt; // for script bindings 324 324 }; 325 325 326 326 struct swish_Parser 327 327 { 328 unsigned intref_cnt; // for script bindings328 int ref_cnt; // for script bindings 329 329 swish_Config *config; // config object 330 330 swish_Analyzer *analyzer; // analyzer object … … 333 333 }; 334 334 335 335 // TODO maybe store swish_Parser * here instead of separate config and analyzer 336 336 struct swish_ParseData 337 337 { 338 338 xmlBufferPtr buf_ptr; // text buffer 339 339 xmlBufferPtr prop_buf; // Property buffer 340 xmlChar *tag; // current tag name341 swish_DocInfo *docinfo; // document-specific properties342 swish_Config *config; // global config340 xmlChar *tag; // current tag name 341 swish_DocInfo *docinfo; // document-specific properties 342 swish_Config *config; // global config 343 343 unsigned int no_index; // toggle flag for special comments 344 344 unsigned int is_html; // shortcut flag for html parser … … 352 352 xmlHashTablePtr propHash; // hash of Props, one for each property 353 353 swish_Analyzer *analyzer; // Analyzer struct 354 void * user_data; // for script bindings354 void *stash; // for script bindings 355 355 }; 356 356 … … 360 360 swish_Parser * swish_init_parser( swish_Config * config, 361 361 swish_Analyzer * analyzer, 362 void (* func) (swish_ParseData *),362 void (*handler) (swish_ParseData *), 363 363 void *stash 364 364 ); 365 void swish_free_parser( swish_Parser * parser );365 void swish_free_parser( swish_Parser * parser ); 366 366 int swish_parse_file( swish_Parser * parser, 367 367 xmlChar *filename, 368 void * user_data);368 void * stash ); 369 369 int swish_parse_stdin( swish_Parser * parser, 370 void * user_data);370 void * stash ); 371 371 int swish_parse_buffer( swish_Parser * parser, 372 372 xmlChar * buf, 373 void * user_data);373 void * stash ); 374 374 375 375 libswish3/trunk/src/libswish3/parser.c
r1927 r1928 123 123 124 124 static swish_ParseData * 125 init_parse_data(swish_Config * config, swish_Analyzer * analyzer, void * user_data);125 init_parse_data(swish_Config * config, swish_Analyzer * analyzer, void * stash); 126 126 static void free_parse_data(swish_ParseData * parse_data); 127 127 … … 607 607 /* SAX2 callback */ 608 608 static void 609 myerr(void *user_data, xmlChar * msg, ...)609 myerr(void *user_data, xmlChar * msg, ...) 610 610 { 611 611 if (!SWISH_PARSER_ERROR) … … 632 632 /* SAX2 callback */ 633 633 static void 634 mywarn(void *user_data, xmlChar * msg, ...)634 mywarn(void *user_data, xmlChar * msg, ...) 635 635 { 636 636 if (!SWISH_PARSER_WARNING) … … 754 754 755 755 static swish_ParseData * 756 init_parse_data(swish_Config * config, swish_Analyzer * analyzer, void * user_data)756 init_parse_data(swish_Config * config, swish_Analyzer * analyzer, void * stash) 757 757 { 758 758 … … 762 762 swish_ParseData *ptr = (swish_ParseData *) swish_xmalloc(sizeof(swish_ParseData)); 763 763 764 ptr-> user_data = user_data;764 ptr->stash = stash; 765 765 766 766 ptr->buf_ptr = xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE); … … 1203 1203 swish_parse_stdin( 1204 1204 swish_Parser * parser, 1205 void * user_data1205 void * stash 1206 1206 ) 1207 1207 { … … 1259 1259 /* blank line indicates body */ 1260 1260 curTime = swish_time_elapsed(); 1261 parse_data = init_parse_data(parser->config, parser->analyzer, user_data);1261 parse_data = init_parse_data(parser->config, parser->analyzer, stash); 1262 1262 head = buf_to_head( (xmlChar*)xmlBufferContent(head_buf) ); 1263 1263 parse_data->docinfo = head_to_docinfo(head); … … 1381 1381 swish_Parser * parser, 1382 1382 xmlChar * buf, 1383 void * user_data1383 void * stash 1384 1384 ) 1385 1385 { … … 1396 1396 swish_debug_msg("number of headlines: %d", head->nlines); 1397 1397 1398 swish_ParseData *parse_data = init_parse_data(parser->config, parser->analyzer, user_data);1398 swish_ParseData *parse_data = init_parse_data(parser->config, parser->analyzer, stash); 1399 1399 parse_data->docinfo = head_to_docinfo(head); 1400 1400 swish_check_docinfo(parse_data->docinfo, parser->config); … … 1439 1439 swish_Parser * parser, 1440 1440 xmlChar * filename, 1441 void * user_data1441 void * stash 1442 1442 ) 1443 1443 { … … 1446 1446 char *etime; 1447 1447 1448 swish_ParseData *parse_data = init_parse_data(parser->config, parser->analyzer, user_data);1448 swish_ParseData *parse_data = init_parse_data(parser->config, parser->analyzer, stash); 1449 1449 parse_data->docinfo = swish_init_docinfo(); 1450 1450
