Changeset 2112 for libswish3/trunk/src/xapian
- Timestamp:
- 04/07/08 21:48:43 (8 months ago)
- Files:
-
- libswish3/trunk/src/xapian/swish_xapian.cpp (modified) (23 diffs)
- libswish3/trunk/src/xapian/test.pl (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/xapian/swish_xapian.cpp
r2111 r2112 73 73 /* global vars */ 74 74 static int debug = 0; 75 static Xapian::WritableDatabase wdb; 76 static Xapian::Database::Database rdb; 77 static Xapian::Stem stemmer("english"); // TODO make this configurable 78 static Xapian::TermGenerator indexer; 79 static int twords = 0; 80 static int skip_duplicates = 0; 81 static int overwrite = 0; 82 static vector < bool > updated; 83 static swish_3 *s3; 84 85 extern int SWISH_DEBUG; 75 static 76 Xapian::WritableDatabase 77 wdb; 78 static 79 Xapian::Database::Database 80 rdb; 81 static 82 Xapian::Stem 83 stemmer( 84 "english" 85 ); // TODO make this configurable 86 static 87 Xapian::TermGenerator 88 indexer; 89 static int 90 twords = 0; 91 static int 92 skip_duplicates = 0; 93 static int 94 overwrite = 0; 95 static 96 vector < 97 bool > 98 updated; 99 static swish_3 * 100 s3; 101 102 extern int 103 SWISH_DEBUG; 86 104 87 105 static struct option … … 183 201 } 184 202 185 inline uint32_t 203 inline 204 uint32_t 186 205 binary_string_to_int( 187 206 const std::string & s … … 190 209 if (s.size() != 4) 191 210 return (uint32_t) - 1; 192 uint32_t 193 v; 211 uint32_t v; 194 212 memcpy(&v, s.data(), 4); 195 213 return ntohl(v); 196 214 } 197 215 198 inline 199 std::string 216 inline std::string 200 217 int_to_binary_string( 201 218 uint32_t v … … 206 223 } 207 224 208 static string 225 static 226 string 209 227 get_prefix( 210 228 xmlChar *metaname, … … 212 230 ) 213 231 { 214 string prefix; 215 swish_MetaName *meta = 216 (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 232 string 233 prefix; 234 swish_MetaName * 235 meta = (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 217 236 prefix = int_to_string(meta->id); 218 return prefix + string((const char *)":");237 return prefix + string((const char *)":"); 219 238 } 220 239 221 240 static void 222 241 add_prefix( 223 swish_MetaName *meta,242 swish_MetaName *meta, 224 243 Xapian::QueryParser qp, 225 xmlChar *name226 ) 227 { 228 qp.add_prefix(string((const char *)name),229 int_to_string(meta->id) + string((const char*)":"));244 xmlChar *name 245 ) 246 { 247 qp.add_prefix(string((const char *)name), 248 int_to_string(meta->id) + string((const char *)":")); 230 249 } 231 250 … … 236 255 ) 237 256 { 238 unsigned int w; 239 swish_MetaName *meta = 240 (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 257 unsigned int 258 w; 259 swish_MetaName * 260 meta = (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 241 261 return meta->bias > 0 ? meta->bias : 1; // TODO need to account for negative values. 242 262 } … … 250 270 { 251 271 // lookup weight and prefix 252 string prefix = get_prefix(metaname, (swish_Config *)config); 253 unsigned int weight = get_weight(metaname, (swish_Config *)config); 272 string 273 prefix = get_prefix(metaname, (swish_Config *)config); 274 unsigned int 275 weight = get_weight(metaname, (swish_Config *)config); 254 276 indexer.index_text((const char *)xmlBufferContent(buffer), weight, prefix); 277 // index swishdefault and swishtitle without any prefix too 278 if (xmlStrEqual(metaname, BAD_CAST SWISH_DEFAULT_METANAME) 279 || xmlStrEqual(metaname, BAD_CAST SWISH_TITLE_METANAME) 280 ) { 281 indexer.index_text((const char *)xmlBufferContent(buffer), weight); 282 } 255 283 } 256 284 … … 262 290 ) 263 291 { 264 swish_Property *prop; 292 swish_Property * 293 prop; 265 294 prop = (swish_Property *)swish_hash_fetch(s3->config->properties, name); 266 295 //SWISH_DEBUG_MSG("adding property %s [%d]: %s", name, prop->id, … … 285 314 } 286 315 if (SWISH_DEBUG & SWISH_DEBUG_NAMEDBUFFER) { 287 swish_debug_nb(parser_data->properties, (xmlChar *)"Property");288 swish_debug_nb(parser_data->metanames, (xmlChar *)"MetaName");316 swish_debug_nb(parser_data->properties, BAD_CAST "Property"); 317 swish_debug_nb(parser_data->metanames, BAD_CAST "MetaName"); 289 318 } 290 319 291 320 // Put the data in the document 292 321 Xapian::Document newdocument; 293 xmlChar * title =294 (xmlChar *)swish_nb_get_value(parser_data->properties,295 (xmlChar *)SWISH_PROP_TITLE);322 xmlChar * 323 title = BAD_CAST swish_nb_get_value(parser_data->properties, 324 BAD_CAST SWISH_PROP_TITLE); 296 325 //printf("title = %s", (char *)title); 297 string unique_id = SWISH_PREFIX_URL + string((const char *)parser_data->docinfo->uri); 298 string record = "url=" + string((const char *)parser_data->docinfo->uri); 326 string 327 unique_id = SWISH_PREFIX_URL + string((const char *)parser_data->docinfo->uri); 328 string 329 record = "url=" + string((const char *)parser_data->docinfo->uri); 299 330 record += "\ntitle=" + string((const char *)title); 300 331 record += "\ntype=" + string((const char *)parser_data->docinfo->mime); … … 310 341 newdocument.add_term(unique_id); 311 342 312 struct tm *tm = localtime(&(parser_data->docinfo->mtime)); 313 string date_term = 314 "D" + date_to_string(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday); 343 struct tm * 344 tm = localtime(&(parser_data->docinfo->mtime)); 345 string 346 date_term = "D" + date_to_string(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday); 315 347 newdocument.add_term(date_term); // Date (YYYYMMDD) 316 348 date_term.resize(7); … … 339 371 // add all metanames and properties 340 372 xmlHashScan(parser_data->metanames->hash, (xmlHashScanner)add_metanames, s3->config); 341 xmlHashScan(parser_data->properties->hash, (xmlHashScanner)add_properties, 342 &newdocument); 373 xmlHashScan(parser_data->properties->hash, (xmlHashScanner)add_properties, &newdocument); 343 374 344 375 if (!skip_duplicates) { … … 373 404 ) 374 405 { 375 int exitcode = 1; 376 string header; 406 int 407 exitcode = 1; 408 string 409 header; 377 410 try { 378 411 if (!overwrite) { … … 388 421 389 422 indexer.set_stemmer(stemmer); 390 423 391 424 // read header if it exists 392 header = dbpath +393 string((const char*)SWISH_PATH_SEP) +394 string((const char*)SWISH_HEADER_FILE);395 if (swish_file_exists( (xmlChar*)header.c_str())) {396 swish_merge_config_with_header((char *)header.c_str(), s3->config);425 header = 426 dbpath + string((const char *)SWISH_PATH_SEP) + 427 string((const char *)SWISH_HEADER_FILE); 428 if (swish_file_exists(BAD_CAST header.c_str())) { 429 swish_merge_config_with_header((char *)header.c_str(), s3->config); 397 430 } 398 431 … … 426 459 ) 427 460 { 428 int exitcode = 1; 429 string header; 461 int 462 exitcode = 1; 463 string 464 header; 430 465 try { 431 466 rdb = Xapian::Database::Database(dbpath); 432 433 header = dbpath +434 string((const char*)SWISH_PATH_SEP) +435 string((const char*)SWISH_HEADER_FILE);436 if (swish_file_exists( (xmlChar*)header.c_str())) {437 swish_merge_config_with_header((char *)header.c_str(), s3->config);467 468 header = 469 dbpath + string((const char *)SWISH_PATH_SEP) + 470 string((const char *)SWISH_HEADER_FILE); 471 if (swish_file_exists(BAD_CAST header.c_str())) { 472 swish_merge_config_with_header((char *)header.c_str(), s3->config); 438 473 } 439 474 … … 464 499 ) 465 500 { 466 int total_matches; 467 Xapian::Enquire *enquire; 468 Xapian::Query query; 501 int 502 total_matches; 503 Xapian::Enquire * enquire; 504 Xapian::Query query; 469 505 Xapian::QueryParser qparser; 470 Xapian::MSet mset;506 Xapian::MSet mset; 471 507 Xapian::MSetIterator iterator; 472 Xapian::Document doc;473 508 Xapian::Document doc; 509 474 510 total_matches = 0; 475 qparser.set_stemmer(stemmer); // TODO make this configurable511 qparser.set_stemmer(stemmer); // TODO make this configurable 476 512 qparser.set_database(rdb); 477 513 478 514 // map all human metanames to internal prefix 479 515 xmlHashScan(s3->config->metanames, (xmlHashScanner)add_prefix, &qparser); 480 516 481 517 // TODO boolean_prefix? 482 518 483 519 try { 484 520 query = qparser.parse_query(string(qstr)); 485 521 } 486 catch (Xapian::QueryParserError &e) {522 catch(Xapian::QueryParserError & e) { 487 523 SWISH_CROAK("query parser error: %s", e.get_msg().c_str()); 488 524 } 489 525 526 // this is very simplistic. swish-e does paging etc. 490 527 enquire = new Xapian::Enquire(rdb); 491 528 enquire->set_query(query); 492 529 mset = enquire->get_mset(0, 100); 530 printf("# %d estimated matches\n", mset.get_matches_estimated()); 531 cout << "# " + query.get_description() << endl; 493 532 iterator = mset.begin(); 494 for ( ; iterator != mset.end(); ++iterator) { 533 534 // output format is simple, not as flexible as swish-e. 535 // But hey. It's an example. 536 for (; iterator != mset.end(); ++iterator) { 495 537 doc = iterator.get_document(); 496 printf("ID %d %d%%\n[\n%s\n]\n", 497 iterator.operator*(), iterator.get_percent(), doc.get_data().c_str()); 538 printf("%3d0 %s \"%s\" %s\n", iterator.get_percent(), 539 doc.get_value(SWISH_PROP_DOCPATH_ID).c_str(), 540 doc.get_value(SWISH_PROP_TITLE_ID).c_str(), 541 doc.get_value(SWISH_PROP_SIZE_ID).c_str() 542 ); 498 543 total_matches++; 499 544 } 500 printf("%d total matches\n", total_matches); 545 546 //printf("# %d total matches\n", total_matches); 501 547 } 502 548 … … 506 552 { 507 553 508 char *descr = "swish_xapian is an example program for using libswish3 with Xapian\n"; 554 char * 555 descr = "swish_xapian is an example program for using libswish3 with Xapian\n"; 509 556 printf("swish_xapian [opts] [- | file(s)]\n"); 510 557 printf("opts:\n --config conf_file.xml\n --query <query>\n --debug [lvl]\n --help\n"); … … 520 567 ) 521 568 { 522 int i, ch; 523 extern char *optarg; 524 extern int optind; 525 int option_index; 526 int files; 527 char *etime; 528 char *query; 529 char *dbpath; 530 string header; 531 double start_time; 532 xmlChar *config_file; 533 569 int 570 i, 571 ch; 572 extern char * 573 optarg; 574 extern int 575 optind; 576 int 577 option_index; 578 int 579 files; 580 char * 581 etime; 582 char * 583 query; 584 char * 585 dbpath; 586 string 587 header; 588 double 589 start_time; 590 xmlChar * 591 config_file; 592 534 593 config_file = NULL; 535 594 option_index = 0; … … 554 613 case 'c': 555 614 //printf("optarg = %s\n", optarg); 556 config_file = swish_xstrdup( (xmlChar *)optarg);615 config_file = swish_xstrdup(BAD_CAST optarg); 557 616 break; 558 617 … … 571 630 572 631 case 'i': 573 dbpath = (char *)swish_xstrdup( (xmlChar *)optarg);632 dbpath = (char *)swish_xstrdup(BAD_CAST optarg); 574 633 break; 575 634 … … 579 638 580 639 case 'q': 581 query = (char *)swish_xstrdup( (xmlChar *)optarg);640 query = (char *)swish_xstrdup(BAD_CAST optarg); 582 641 break; 583 642 … … 611 670 612 671 if (!dbpath) { 613 dbpath = (char *)swish_xstrdup( (xmlChar *)SWISH_INDEX_FILENAME);672 dbpath = (char *)swish_xstrdup(BAD_CAST SWISH_INDEX_FILENAME); 614 673 } 615 674 616 675 // indexing mode 617 676 if (!query) { 618 677 619 678 open_writeable_index(dbpath); 620 679 621 680 for (; i < argc; i++) { 622 681 if (argv[i][0] != '-') { … … 641 700 // but also if it is not (defaults). 642 701 // so we re-write every time we have a writeable db. 643 header = dbpath +644 string((const char*)SWISH_PATH_SEP) +645 string((const char*)SWISH_HEADER_FILE);646 swish_write_header((char *)header.c_str(), s3->config);702 header = 703 dbpath + string((const char *)SWISH_PATH_SEP) + 704 string((const char *)SWISH_HEADER_FILE); 705 swish_write_header((char *)header.c_str(), s3->config); 647 706 648 707 } … … 656 715 657 716 etime = swish_print_time(swish_time_elapsed() - start_time); 658 printf(" %s total time\n\n", etime);717 printf("# %s total time\n\n", etime); 659 718 swish_xfree(etime); 660 719 swish_xfree(dbpath); libswish3/trunk/src/xapian/test.pl
r2111 r2112 11 11 12 12 # searching 13 ok( ( grep {m/2 totalmatches/} run(' --query swishtitle:foobar') ),13 ok( ( grep {m/2 estimated matches/} run(' --query swishtitle:foobar') ), 14 14 'search swishtitle:foobar' ); 15 15
