Changeset 2108
- Timestamp:
- 03/31/08 23:47:51 (2 months ago)
- Files:
-
- libswish3/trunk/src/libswish3/config.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/hash.c (modified) (2 diffs)
- libswish3/trunk/src/libswish3/header.c (modified) (39 diffs)
- libswish3/trunk/src/libswish3/io.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (6 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (26 diffs)
- libswish3/trunk/src/xapian/swish_xapian.cpp (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/config.c
r2106 r2108 37 37 extern int SWISH_DEBUG; 38 38 39 void swish_free_config(40 swish_Config *config41 );42 swish_Config *swish_init_config(43 );44 void swish_config_set_default(45 swish_Config *config46 );47 swish_Config *swish_add_config(48 xmlChar *conf,49 swish_Config *config50 );51 swish_Config *swish_parse_config(52 xmlChar *conf,53 swish_Config *config54 );55 void swish_debug_config(56 swish_Config *config57 );58 void swish_config_merge(59 swish_Config *config1,60 swish_Config *config261 );62 39 static void free_string( 63 40 xmlChar *payload, libswish3/trunk/src/libswish3/hash.c
r2103 r2108 71 71 ) 72 72 { 73 if (SWISH_DEBUG > 10)74 printf("freeing %s from hash key %s\n", (xmlChar *)val, key);75 76 73 swish_xfree(val); 77 74 } … … 90 87 91 88 return ret; 89 } 90 91 void 92 swish_hash_free( 93 xmlHashTablePtr hash 94 ) 95 { 96 xmlHashFree(hash, (xmlHashDeallocator)free_hashval); 92 97 } 93 98 libswish3/trunk/src/libswish3/header.c
r2106 r2108 108 108 xmlChar *name 109 109 ); 110 static void 111 test_meta_unique_ids( 110 static void test_meta_unique_ids( 112 111 swish_MetaName *meta, 113 112 swish_Config *c, 114 113 xmlChar *name 115 114 ); 116 static void 117 test_prop_unique_ids( 115 static void test_prop_unique_ids( 118 116 swish_Property *prop, 119 117 swish_Config *c, … … 269 267 270 268 meta = 271 swish_init_metaname(swish_str_tolower 272 ((xmlChar *)xmlTextReaderConstName(reader))); 269 swish_init_metaname(swish_str_tolower((xmlChar *)xmlTextReaderConstName(reader))); 273 270 meta->ref_cnt++; 274 271 value = NULL; … … 369 366 370 367 if (xmlStrEqual(attr, (xmlChar *)"ignore_case")) { 371 prop->ignore_case = 372 (boolean) strtol((char *)attr_val, (char **)NULL, 10); 368 prop->ignore_case = (boolean) strtol((char *)attr_val, (char **)NULL, 10); 373 369 } 374 370 else if (xmlStrEqual(attr, (xmlChar *)"max")) { … … 414 410 415 411 prop = 416 swish_init_property(swish_str_tolower 417 ((xmlChar *)xmlTextReaderConstName(reader))); 412 swish_init_property(swish_str_tolower((xmlChar *)xmlTextReaderConstName(reader))); 418 413 prop->ref_cnt++; 419 414 value = NULL; … … 521 516 } 522 517 518 //SWISH_DEBUG_MSG("END ELEMENT name %s type %d value %s", name, type, value); 519 523 520 return; 524 521 … … 549 546 return; 550 547 } 548 549 //SWISH_DEBUG_MSG("NOT END ELEMENT name %s type %d value %s", name, type, value); 551 550 } 552 551 … … 574 573 } 575 574 else if (h->isalias) { 576 read_key_values_pair(reader, h->config->tag_aliases, 577 (xmlChar *)name); 575 read_key_values_pair(reader, h->config->tag_aliases, (xmlChar *)name); 578 576 return; 579 577 } … … 582 580 return; 583 581 } 582 583 /* 584 SWISH_DEBUG_MSG("STILL NOT END ELEMENT name %s type %d value %s", name, type, value); 585 */ 584 586 585 587 } … … 608 610 609 611 for (i = 0; i < strlist->n; i++) { 610 /* SWISH_DEBUG_MSG("key_values pair: %s -> %s", strlist->word[i], name);*/612 /* SWISH_DEBUG_MSG("key_values pair: %s -> %s", strlist->word[i], name); */ 611 613 if (swish_hash_exists(hash, strlist->word[i])) { 612 swish_hash_replace(hash, strlist->word[i], 613 swish_xstrdup(name)); 614 swish_hash_replace(hash, strlist->word[i], swish_xstrdup(name)); 614 615 } 615 616 else { … … 645 646 if (xmlTextReaderNodeType(reader) == XML_READER_TYPE_TEXT) { 646 647 value = xmlTextReaderConstValue(reader); 647 /* SWISH_DEBUG_MSG("read key %s for value %s", name, value);*/648 /* SWISH_DEBUG_MSG("read key %s for value %s", name, value); */ 648 649 if (swish_hash_exists(hash, name)) { 649 650 swish_hash_replace(hash, name, swish_xstrdup(value)); … … 678 679 if (stat((char *)filename, &fileinfo)) { 679 680 reader = 680 xmlReaderForMemory((const char *)filename, 681 xmlStrlen((xmlChar *)filename), "[ swish.xml ]", 682 NULL, 0); 681 xmlReaderForMemory((const char *)filename, xmlStrlen((xmlChar *)filename), 682 "[ swish.xml ]", NULL, 0); 683 683 684 684 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) { … … 722 722 ) 723 723 { 724 if (meta->alias_for != NULL 725 && !swish_hash_exists(c->metanames, meta->alias_for) 724 if (meta->alias_for != NULL && !swish_hash_exists(c->metanames, meta->alias_for) 726 725 ) { 727 726 SWISH_CROAK 728 ("MetaName %s has alias_for value of %sbut no such MetaName defined",727 ("MetaName '%s' has alias_for value of '%s' but no such MetaName defined", 729 728 name, meta->alias_for); 730 729 } … … 738 737 ) 739 738 { 740 if (prop->alias_for != NULL 741 && !swish_hash_exists(c->properties, prop->alias_for) 739 if (prop->alias_for != NULL && !swish_hash_exists(c->properties, prop->alias_for) 742 740 ) { 743 741 SWISH_CROAK 744 ("Property %s has alias_for value of %sbut no such Property defined",742 ("Property '%s' has alias_for value of '%s' but no such Property defined", 745 743 name, prop->alias_for); 746 744 } … … 752 750 ) 753 751 { 754 xmlHashScan(c->metanames, (xmlHashScanner) test_meta_alias_for, c);755 xmlHashScan(c->properties, (xmlHashScanner) test_prop_alias_for, c);752 xmlHashScan(c->metanames, (xmlHashScanner)test_meta_alias_for, c); 753 xmlHashScan(c->properties, (xmlHashScanner)test_prop_alias_for, c); 756 754 } 757 755 … … 763 761 ) 764 762 { 765 c->flags->meta_ids[ meta->id]++;763 c->flags->meta_ids[meta->id]++; 766 764 } 767 765 … … 773 771 ) 774 772 { 775 c->flags->prop_ids[ prop->id]++;773 c->flags->prop_ids[prop->id]++; 776 774 } 777 775 … … 782 780 { 783 781 int i; 784 xmlHashScan(c->metanames, (xmlHashScanner)test_meta_unique_ids, c);785 xmlHashScan(c->properties, (xmlHashScanner) test_prop_unique_ids, c);786 for (i=0; i<SWISH_MAX_IDS; i++) {782 xmlHashScan(c->metanames, (xmlHashScanner)test_meta_unique_ids, c); 783 xmlHashScan(c->properties, (xmlHashScanner)test_prop_unique_ids, c); 784 for (i = 0; i < SWISH_MAX_IDS; i++) { 787 785 if (c->flags->meta_ids[i] > 1) { 788 786 SWISH_WARN("meta id %d == %d", i, c->flags->meta_ids[i]); … … 791 789 SWISH_WARN("prop id %d == %d", i, c->flags->prop_ids[i]); 792 790 } 793 794 /* set back to 0 in case we are called again */ 791 792 /* 793 set back to 0 in case we are called again 794 */ 795 795 c->flags->prop_ids[i] = 0; 796 796 c->flags->meta_ids[i] = 0; 797 } 797 } 798 798 } 799 799 … … 809 809 h->isprops = 0; 810 810 h->ismetas = 0; 811 h->isindex = 0; 812 h->isalias = 0; 813 h->isparser = 0; 814 h->ismime = 0; 811 815 h->parent_name = NULL; 812 816 h->prop_id = SWISH_PROP_THIS_MUST_COME_LAST_ID; … … 823 827 h = init_headmaker(); 824 828 read_header(filename, h); 825 829 826 830 /* test that all the alias_for links resolve ok */ 827 831 swish_config_test_alias_fors(h->config); 828 832 829 833 /* make sure ids are all unique */ 830 834 swish_config_test_unique_ids(h->config); … … 857 861 /* test that all the alias_for links resolve ok */ 858 862 swish_config_test_alias_fors(c); 859 863 860 864 /* make sure ids are all unique */ 861 865 swish_config_test_unique_ids(c); … … 888 892 889 893 if (rc < 0) { 890 SWISH_CROAK("Error writing elem tn%s", tag);894 SWISH_CROAK("Error writing element %s", tag); 891 895 } 892 896 } … … 928 932 boolean is_alias; 929 933 write_open_tag(writer, name); 930 if (meta->alias_for == NULL) { 931 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", 932 BAD_CAST ""); 933 is_alias = 0; 934 } 935 else { 936 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", 937 meta->alias_for); 934 is_alias = 0; 935 rc = 0; 936 if (meta->alias_for != NULL) { 937 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", meta->alias_for); 938 938 is_alias = 1; 939 939 } … … 943 943 944 944 if (!is_alias) { 945 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "bias", "%d", 946 meta->bias); 945 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "bias", "%d", meta->bias); 947 946 if (rc < 0) { 948 947 SWISH_CROAK("Error writing metaname bias attribute for %s", name); … … 951 950 } 952 951 953 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", 954 meta->id); 952 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", meta->id); 955 953 if (rc < 0) { 956 954 SWISH_CROAK("Error writing metaname id attribute for %s", name); … … 965 963 ) 966 964 { 967 xmlHashScan(metanames, (xmlHashScanner) write_metaname, writer);965 xmlHashScan(metanames, (xmlHashScanner)write_metaname, writer); 968 966 } 969 967 … … 988 986 boolean is_alias; 989 987 write_open_tag(writer, name); 990 if (prop->alias_for == NULL) { 991 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", 992 BAD_CAST ""); 993 is_alias = 0; 994 } 995 else { 996 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", 997 prop->alias_for); 988 rc = 0; 989 is_alias = 0; 990 if (prop->alias_for != NULL) { 991 rc = xmlTextWriterWriteAttribute(writer, BAD_CAST "alias_for", prop->alias_for); 998 992 is_alias = 1; 999 993 } … … 1001 995 SWISH_CROAK("Error writing property alias_for attribute for %s", name); 1002 996 } 1003 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", 1004 prop->id); 997 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", prop->id); 1005 998 if (rc < 0) { 1006 999 SWISH_CROAK("Error writing property id attribute for %s", name); … … 1009 1002 /* all other attrs are irrelevant if this is an alias */ 1010 1003 if (!is_alias) { 1011 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "ignore_case", 1012 "%d",prop->ignore_case);1004 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "ignore_case", "%d", 1005 prop->ignore_case); 1013 1006 if (rc < 0) { 1014 SWISH_CROAK("Error writing property ignore_case attribute for %s", 1015 name); 1016 } 1017 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "verbatim", 1018 "%d", prop->verbatim); 1007 SWISH_CROAK("Error writing property ignore_case attribute for %s", name); 1008 } 1009 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "verbatim", "%d", 1010 prop->verbatim); 1019 1011 if (rc < 0) { 1020 SWISH_CROAK("Error writing property verbatim attribute for %s", 1021 name); 1022 } 1023 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "type", "%d", 1024 prop->type); 1012 SWISH_CROAK("Error writing property verbatim attribute for %s", name); 1013 } 1014 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "type", "%d", prop->type); 1025 1015 if (rc < 0) { 1026 1016 SWISH_CROAK("Error writing property type attribute for %s", name); 1027 1017 } 1028 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "max", "%d", 1029 prop->max); 1018 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "max", "%d", prop->max); 1030 1019 if (rc < 0) { 1031 1020 SWISH_CROAK("Error writing property max attribute for %s", name); 1032 1021 } 1033 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "sort", "%d", 1034 prop->sort); 1022 rc = xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "sort", "%d", prop->sort); 1035 1023 if (rc < 0) { 1036 1024 SWISH_CROAK("Error writing property sort attribute for %s", name); … … 1046 1034 ) 1047 1035 { 1048 xmlHashScan(properties, (xmlHashScanner) write_property, writer);1036 xmlHashScan(properties, (xmlHashScanner)write_property, writer); 1049 1037 } 1050 1038 … … 1065 1053 ) 1066 1054 { 1067 xmlHashScan(parsers, (xmlHashScanner) write_parser, writer);1055 xmlHashScan(parsers, (xmlHashScanner)write_parser, writer); 1068 1056 } 1069 1057 … … 1076 1064 { 1077 1065 if (!swish_hash_exists((xmlHashTablePtr) things->thing1, ext) 1078 || !xmlStrEqual(swish_hash_fetch((xmlHashTablePtr) things->thing1, ext), 1079 type) 1066 || !xmlStrEqual(swish_hash_fetch((xmlHashTablePtr) things->thing1, ext), type) 1080 1067 ) { 1081 1068 … … 1083 1070 SWISH_DEBUG_MSG("writing unique MIME %s => %s", ext, type); 1084 1071 } 1085 write_element_with_content((xmlTextWriterPtr) things->thing3, ext, 1086 type); 1072 write_element_with_content((xmlTextWriterPtr) things->thing3, ext, type); 1087 1073 } 1088 1074 } … … 1097 1083 things *things; 1098 1084 things = swish_xmalloc(sizeof(things)); 1099 1100 1085 things->thing1 = swish_mime_hash(); 1101 1086 things->thing2 = mimes; 1102 1087 things->thing3 = writer; 1103 xmlHashScan(mimes, (xmlHashScanner) write_mime, things); 1088 xmlHashScan(mimes, (xmlHashScanner)write_mime, things); 1089 swish_hash_free( things->thing1 ); 1090 swish_xfree(things); 1104 1091 } 1105 1092 … … 1110 1097 ) 1111 1098 { 1112 xmlHashScan(index, (xmlHashScanner) write_hash_entry, writer);1099 xmlHashScan(index, (xmlHashScanner)write_hash_entry, writer); 1113 1100 } 1114 1101 … … 1119 1106 ) 1120 1107 { 1121 xmlHashScan(tag_aliases, (xmlHashScanner) write_hash_entry, writer);1108 xmlHashScan(tag_aliases, (xmlHashScanner)write_hash_entry, writer); 1122 1109 } 1123 1110 … … 1128 1115 ) 1129 1116 { 1130 xmlHashScan(hash, (xmlHashScanner) write_hash_entry, writer);1117 xmlHashScan(hash, (xmlHashScanner)write_hash_entry, writer); 1131 1118 } 1132 1119 … … 1142 1129 int rc; 1143 1130 xmlTextWriterPtr writer; 1131 1132 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) { 1133 swish_debug_config(config); 1134 } 1144 1135 1145 1136 /* Create a new XmlWriter for uri, with no compression. */ … … 1169 1160 1170 1161 /* Write a comment indicating a computer wrote this file */ 1171 rc = xmlTextWriterWriteComment(writer, 1172 BAD_CAST 1173 "written by libswish3 - DO NOT EDIT"); 1162 rc = xmlTextWriterWriteComment(writer, BAD_CAST "written by libswish3 - DO NOT EDIT"); 1174 1163 if (rc < 0) { 1175 1164 SWISH_CROAK("Error at xmlTextWriterWriteComment\n"); 1176 1165 } 1177 1166 1178 write_element_with_content(writer, BAD_CAST "swish_verson", 1179 BAD_CAST SWISH_VERSION); 1180 write_element_with_content(writer, BAD_CAST "swish_lib_version", 1181 BAD_CAST SWISH_LIB_VERSION); 1167 // TODO check for these in reader and croak if mismatch 1168 if (!swish_hash_exists(config->misc, BAD_CAST "swish_version")) { 1169 write_element_with_content(writer, BAD_CAST "swish_verson", 1170 BAD_CAST SWISH_VERSION); 1171 } 1172 if (!swish_hash_exists(config->misc, BAD_CAST "swish_lib_version")) { 1173 write_element_with_content(writer, BAD_CAST "swish_lib_version", 1174 BAD_CAST SWISH_LIB_VERSION); 1175 } 1182 1176 1183 1177 /* write MetaNames */ libswish3/trunk/src/libswish3/io.c
r2103 r2108 157 157 158 158 } 159 160 boolean 161 swish_file_exists( 162 xmlChar *filename 163 ) 164 { 165 struct stat info; 166 if (stat((char *)filename, &info)) { 167 return 0; 168 } 169 return 1; 170 } libswish3/trunk/src/libswish3/libswish3.h
r2106 r2108 85 85 #define SWISH_HYPERE_FORMAT "hypere" 86 86 #define SWISH_INDEX_FILEFORMAT "swish" 87 #define SWISH_HEADER_FILE "swish.xml" 87 88 88 89 /* properties */ … … 187 188 typedef struct swish_Word swish_Word; 188 189 typedef struct swish_WordList swish_WordList; 189 typedef struct swish_ParserData swish_ParserData;190 typedef struct swish_ParserData swish_ParserData; 190 191 typedef struct swish_Tag swish_Tag; 191 192 typedef struct swish_TagStack swish_TagStack; … … 398 399 xmlChar * swish_slurp_file_len( xmlChar *filename, long flen ); 399 400 xmlChar * swish_slurp_file( xmlChar *filename ); 401 boolean swish_file_exists( xmlChar *filename ); 400 402 /* 401 403 =cut … … 413 415 void * swish_hash_fetch( xmlHashTablePtr hash, xmlChar *key ); 414 416 xmlHashTablePtr swish_init_hash(int size); 417 void swish_hash_free( xmlHashTablePtr hash ); 415 418 /* 416 419 =cut … … 492 495 void swish_config_test_unique_ids( swish_Config *c ); 493 496 void swish_config_test_alias_fors( swish_Config *c ); 497 swish_ConfigFlags * swish_init_config_flags(); 498 494 499 /* 495 500 =cut … … 643 648 swish_Config * swish_read_header(char *filename); 644 649 void swish_write_header(char* filename, swish_Config* config); 650 void swish_config_test_alias_fors(swish_Config *config); 651 void swish_config_test_unique_ids(swish_Config *config); 645 652 /* 646 653 =cut libswish3/trunk/src/libswish3/parser.c
r2104 r2108 1 2 1 /* 3 2 * This file is part of libswish3 … … 485 484 */ 486 485 487 if (SWISH_DEBUG > 2)486 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 488 487 SWISH_DEBUG_MSG("startDocument()"); 489 488 … … 499 498 { 500 499 501 if (SWISH_DEBUG > 2)500 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 502 501 SWISH_DEBUG_MSG("endDocument()"); 503 502 … … 643 642 parser_data->tag = build_tag(parser_data, (xmlChar *)tag, NULL); 644 643 645 if (SWISH_DEBUG > 2)644 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 646 645 SWISH_DEBUG_MSG(" endElement(%s) (%s)", (xmlChar *)tag, 647 646 parser_data->tag); … … 744 743 ) 745 744 { 746 if (SWISH_DEBUG > 2)745 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 747 746 SWISH_DEBUG_MSG(" >> mycharacters()"); 748 747 … … 943 942 { 944 943 945 if (SWISH_DEBUG > 9)944 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 946 945 SWISH_DEBUG_MSG("init parser_data"); 947 946 … … 1024 1023 ptr->ctxt = NULL; 1025 1024 1026 if (SWISH_DEBUG > 9)1025 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1027 1026 SWISH_DEBUG_MSG("init done for parser_data"); 1028 1027 … … 1037 1036 { 1038 1037 1039 if (SWISH_DEBUG > 9)1038 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1040 1039 SWISH_DEBUG_MSG("freeing swish_ParserData"); 1041 1040 … … 1226 1225 info->ref_cnt++; 1227 1226 1228 if (SWISH_DEBUG > 5)1227 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1229 1228 SWISH_DEBUG_MSG("preparing to parse %d header lines", h->nlines); 1230 1229 … … 1235 1234 val = swish_str_skip_ws(++val); 1236 1235 1237 if (SWISH_DEBUG > 2) {1236 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 1238 1237 SWISH_DEBUG_MSG("%d parsing header line: %s", i, line); 1239 1238 … … 1640 1639 head = buf_to_head(buf); 1641 1640 1642 if (SWISH_DEBUG > 9)1641 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1643 1642 SWISH_DEBUG_MSG("number of headlines: %d", head->nlines); 1644 1643 … … 1661 1660 (*s3->parser->handler) (parser_data); 1662 1661 1663 if (SWISH_DEBUG > 1) {1662 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 1664 1663 swish_debug_docinfo(parser_data->docinfo); 1665 1664 SWISH_DEBUG_MSG(" word buffer length: %d bytes", … … 1717 1716 (*s3->parser->handler) (parser_data); 1718 1717 1719 if (SWISH_DEBUG > 1) {1718 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 1720 1719 swish_debug_docinfo(parser_data->docinfo); 1721 1720 SWISH_DEBUG_MSG(" word buffer length: %d bytes", … … 1879 1878 set_encoding(parser_data, buffer); 1880 1879 1881 if (SWISH_DEBUG > 3)1880 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1882 1881 SWISH_DEBUG_MSG("txt parser encoding: %s", 1883 1882 parser_data->docinfo->encoding); … … 1898 1897 else if (xmlStrEqual 1899 1898 (parser_data->docinfo->encoding, (xmlChar *)"unknown")) { 1900 if (SWISH_DEBUG > 3)1899 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1901 1900 SWISH_DEBUG_MSG("default env encoding -> %s", enc); 1902 1901 … … 2219 2218 swish_Tag *thistag = swish_xmalloc(sizeof(swish_Tag)); 2220 2219 2221 if (SWISH_DEBUG > 3) {2220 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2222 2221 SWISH_DEBUG_MSG(" >>>>>>> before push: tag = '%s'", tag); 2223 2222 _debug_stack(stack); … … 2249 2248 stack->flat = flatten_tag_stack(NULL, stack); 2250 2249 2251 if (SWISH_DEBUG > 3) {2250 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2252 2251 SWISH_DEBUG_MSG 2253 2252 (" >>> stack size: %d thistag count: %d current head tag = '%s'", … … 2267 2266 { 2268 2267 2269 if (SWISH_DEBUG > 3) {2268 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2270 2269 SWISH_DEBUG_MSG(" pop_tag_stack: %s from %s", stack->head->name, 2271 2270 stack->name); … … 2275 2274 2276 2275 if (stack->count > 1) { 2277 if (SWISH_DEBUG > 3) {2276 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2278 2277 SWISH_DEBUG_MSG(" >>> %d: popping '%s' from tagstack <<<", 2279 2278 stack->head->n, stack->head->name); … … 2296 2295 else { 2297 2296 2298 if (SWISH_DEBUG > 3) {2297 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2299 2298 SWISH_DEBUG_MSG 2300 2299 (" >>> %d: popping '%s' from tagstack will leave stack empty (flat: %s) <<<", … … 2324 2323 stack->flat = flatten_tag_stack(NULL, stack); 2325 2324 2326 if (SWISH_DEBUG > 3) {2325 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2327 2326 SWISH_DEBUG_MSG(" >> stack size = %d head of stack = %s <<", 2328 2327 stack->count, stack->head->name); … … 2349 2348 prev_flat = swish_xstrdup(stack->flat); 2350 2349 2351 if (SWISH_DEBUG > 3) {2350 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2352 2351 SWISH_DEBUG_MSG("pop_tag_stack_on_match() for %s", stack->name); 2353 2352 SWISH_DEBUG_MSG("comparing '%s' against '%s'", tag, stack->head->name); … … 2357 2356 if (xmlStrEqual(stack->head->name, tag)) { 2358 2357 2359 if (SWISH_DEBUG > 3) {2358 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2360 2359 SWISH_DEBUG_MSG 2361 2360 (" >>>>>>>>>>>>>>>>>>> current tag = '%s' matches top of tagstack", … … 2369 2368 if (pop_tag_stack(stack)) { 2370 2369 2371 if (SWISH_DEBUG > 3) {2370 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2372 2371 SWISH_DEBUG_MSG("stack popped. tag = %s stack->head = %s", 2373 2372 tag, stack->head->name); … … 2381 2380 */ 2382 2381 else if (stack->count) { 2383 if (SWISH_DEBUG > 3)2382 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 2384 2383 SWISH_DEBUG_MSG(" using stack->head %s", stack->head->name); 2385 2384 … … 2389 2388 } 2390 2389 else { 2391 if (SWISH_DEBUG > 3)2390 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 2392 2391 SWISH_DEBUG_MSG("no match for '%s'", tag); 2393 2392 libswish3/trunk/src/xapian/swish_xapian.cpp
r2096 r2108 18 18 */ 19 19 20 /* example Swish3 program using Xapian IR backend. 20 /* 21 example Swish3 program using Xapian IR backend. 21 22 many of the string conversion functions and the index_document() code 22 23 come nearly verbatim from the xapian-omega distribution. 23 24 24 25 */ 25 26 //#include <config.h>27 26 28 27 #include <algorithm> … … 45 44 #include <getopt.h> 46 45 47 48 46 #include <xapian.h> 49 47 … … 54 52 55 53 /* prototypes */ 56 int main(int argc, char **argv); 57 int usage(); 58 void handler(swish_ParserData * parser_data); 59 int open_writeable_index(char* dbpath); 60 int open_readable_index(char* dbpath); 61 int do_search(char* query); 54 int main( 55 int argc, 56 char **argv 57 ); 58 int usage( 59 ); 60 void handler( 61 swish_ParserData *parser_data 62 ); 63 int open_writeable_index( 64 char *dbpath 65 ); 66 int open_readable_index( 67 char *dbpath 68 ); 69 int do_search( 70 char *query 71 ); 62 72 63 73 /* global vars */ 64 static int debug = 0;65 static Xapian::WritableDatabase wdb;66 static Xapian::Database::Database rdb;74 static int debug = 0; 75 static Xapian::WritableDatabase wdb; 76 static Xapian::Database::Database rdb; 67 77 static Xapian::Stem stemmer("english"); 68 static Xapian::TermGenerator indexer; 69 static int twords = 0; 70 static int skip_duplicates = 0; 71 static int overwrite = 0; 72 static vector<bool> updated; 73 static swish_3* s3; 74 75 76 extern int SWISH_DEBUG; 77 78 static struct option longopts[] = 79 { 80 {"config", required_argument, 0, 'c'}, 81 {"debug", required_argument, 0, 'd'}, 82 {"help", no_argument, 0, 'h'}, 83 {"index", required_argument, 0, 'i'}, 84 {"skip-duplicates", no_argument, 0, 's'}, 85 {"overwrite", no_argument, 0, 'o'}, 86 {"query", required_argument, 0, 'q'}, 78 static Xapian::TermGenerator indexer; 79 static int twords = 0; 80 static int skip_duplicates = 0; 81 static int overwrite = 0; 82 static vector < bool > updated; 83 static swish_3 *s3; 84 85 extern int SWISH_DEBUG; 86 87 static struct option 88 longopts[] = { 89 {"config", required_argument, 0, 'c'}, 90 {"debug", required_argument, 0, 'd'}, 91 {"help", no_argument, 0, 'h'}, 92 {"index", required_argument, 0, 'i'}, 93 {"skip-duplicates", no_argument, 0, 's'}, 94 {"overwrite", no_argument, 0, 'o'}, 95 {"query", required_argument, 0, 'q'}, 87 96 {0, 0, 0, 0} 88 97 }; 89 90 98 91 99 // This ought to be enough for any of the conversions below. … … 108 116 109 117 int 110 string_to_int(const string &s) 118 string_to_int( 119 const string & s 120 ) 111 121 { 112 122 return atoi(s.c_str()); … … 114 124 115 125 string 116 int_to_string(int val) 126 int_to_string( 127 int val 128 ) 117 129 { 118 130 CONVERT_TO_STRING("%d") … … 120 132 121 133 string 122 long_to_string(long val) 134 long_to_string( 135 long val 136 ) 123 137 { 124 138 CONVERT_TO_STRING("%ld") … … 126 140 127 141 string 128 double_to_string(double val) 142 double_to_string( 143 double val 144 ) 129 145 { 130 146 CONVERT_TO_STRING("%f") … … 132 148 133 149 string 134 date_to_string(int y, int m, int d) 135 { 136 char buf[11]; 137 if (y < 0) y = 0; else if (y > 9999) y = 9999; 138 if (m < 1) m = 1; else if (m > 12) m = 12; 139 if (d < 1) d = 1; else if (d > 31) d = 31; 150 date_to_string( 151 int y, 152 int m, 153 int d 154 ) 155 { 156 char 157 buf[11]; 158 if (y < 0) 159 y = 0; 160 else if (y > 9999) 161 y = 9999; 162 if (m < 1) 163 m = 1; 164 else if (m > 12) 165 m = 12; 166 if (d < 1) 167 d = 1; 168 else if (d > 31) 169 d = 31; 140 170 #ifdef SNPRINTF 141 int len = SNPRINTF(buf, sizeof(buf), "%04d%02d%02d", y, m, d); 142 if (len == -1 || len > BUFSIZE) return string(buf, BUFSIZE); 171 int 172 len = SNPRINTF(buf, sizeof(buf), "%04d%02d%02d", y, m, d); 173 if (len == -1 || len > BUFSIZE) 174 return string(buf, BUFSIZE); 143 175 return string(buf, len); 144 176 #else 145 177 buf[sizeof(buf) - 1] = '\0'; 146 178 sprintf(buf, "%04d%02d%02d", y, m, d); 147 if (buf[sizeof(buf) - 1]) abort(); /* Uh-oh, buffer overrun */ 179 if (buf[sizeof(buf) - 1]) 180 abort(); /* Uh-oh, buffer overrun */ 148 181 return string(buf); 149 182 #endif 150 183 } 151 184 152 inline uint32_t binary_string_to_int(const std::string &s) 153 { 154 if (s.size() != 4) return (uint32_t)-1; 155 uint32_t v; 185 inline uint32_t 186 binary_string_to_int( 187 const std::string & s 188 ) 189 { 190 if (s.size() != 4) 191 return (uint32_t) - 1; 192 uint32_t 193 v; 156 194 memcpy(&v, s.data(), 4); 157 195 return ntohl(v); 158 196 } 159 197 160 inline std::string int_to_binary_string(uint32_t v) 198 inline 199 std::string 200 int_to_binary_string( 201 uint32_t v 202 ) 161 203 { 162 204 v = htonl(v); 163 return std::string(reinterpret_cast <const char*>(&v), 4);205 return std::string(reinterpret_cast < const char *>(&v), 4); 164 206 } 165 207 166 208 static string 167 get_prefix(xmlChar* metaname, swish_Config* config) 209 get_prefix( 210 xmlChar *metaname, 211 swish_Config *config 212 ) 168 213 { 169 214 string prefix; 170 swish_MetaName *meta = (swish_MetaName*)swish_hash_fetch(config->metanames, metaname); 215 swish_MetaName *meta = 216 (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 171 217 prefix = int_to_string(meta->id); 172 218 return prefix; … … 174 220 175 221 static unsigned int 176 get_weight(xmlChar* metaname, swish_Config* config) 222 get_weight( 223 xmlChar *metaname, 224 swish_Config *config 225 ) 177 226 { 178 227 unsigned int w; 179 swish_MetaName *meta = (swish_MetaName*)swish_hash_fetch(config->metanames, metaname); 180 return meta->bias > 0 ? meta->bias : 1; // TODO need to account for negative values. 228 swish_MetaName *meta = 229 (swish_MetaName *)swish_hash_fetch(config->metanames, metaname); 230 return meta->bias > 0 ? meta->bias : 1; // TODO need to account for negative values. 181 231 } 182 232 183 233 static void 184 add_metanames(xmlBufferPtr buffer, void* config, xmlChar* metaname) 234 add_metanames( 235 xmlBufferPtr buffer, 236 void *config, 237 xmlChar *metaname 238 ) 185 239 { 186 240 // lookup weight and prefix 187 string prefix = get_prefix(metaname, (swish_Config*)config);188 unsigned int weight = get_weight(metaname, (swish_Config *)config);189 indexer.index_text((const char *)xmlBufferContent(buffer), weight, prefix);241 string prefix = get_prefix(metaname, (swish_Config *)config); 242 unsigned int weight = get_weight(metaname, (swish_Config *)config); 243 indexer.index_text((const char *)xmlBufferContent(buffer), weight, prefix); 190 244 } 191 245 192 246 static void 193 add_properties(xmlBufferPtr buffer, Xapian::Document doc, xmlChar* name) 194 { 195 swish_Property* prop; 196 prop = (swish_Property*)swish_hash_fetch(s3->config->properties, name); 197 SWISH_DEBUG_MSG("adding property %s [%d]: %s", name, prop->id, xmlBufferContent(buffer)); 198 doc.add_value(prop->id, (const char*)xmlBufferContent(buffer)); 199 } 200 201 void 202 handler(swish_ParserData * parser_data) 203 { 204 printf("nwords: %d\n", parser_data->docinfo->nwords); 205 247 add_properties( 248 xmlBufferPtr buffer, 249 Xapian::Document doc, 250 xmlChar *name 251 ) 252 { 253 swish_Property *prop; 254 prop = (swish_Property *)swish_hash_fetch(s3->config->properties, name); 255 //SWISH_DEBUG_MSG("adding property %s [%d]: %s", name, prop->id, 256 // xmlBufferContent(buffer)); 257 doc.add_value(prop->id, (const char *)xmlBufferContent(buffer)); 258 } 259 260 void 261 handler( 262 swish_ParserData *parser_data 263 ) 264 { 265 //printf("nwords: %d\n", parser_data->docinfo->nwords); 266
