Changeset 2096
- Timestamp:
- 03/21/08 14:27:54 (2 months ago)
- Files:
-
- libswish3/trunk/libswish3-config.in (modified) (2 diffs)
- libswish3/trunk/src/libswish3/analyzer.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/config.c (modified) (10 diffs)
- libswish3/trunk/src/libswish3/hash.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/header.c (modified) (9 diffs)
- libswish3/trunk/src/libswish3/libswish3.h (modified) (4 diffs)
- libswish3/trunk/src/libswish3/namedbuffer.c (modified) (3 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (10 diffs)
- libswish3/trunk/src/libswish3/string.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/swish.c (modified) (1 diff)
- libswish3/trunk/src/libswish3/words.c (modified) (27 diffs)
- libswish3/trunk/src/swish_lint.c (modified) (1 diff)
- libswish3/trunk/src/xapian/swish_xapian.cpp (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/libswish3-config.in
r1923 r2096 68 68 69 69 --cflags) 70 echo @CFLAGS@ -I${includedir} @Z_CFLAGS@70 echo @CFLAGS@ -I${includedir} 71 71 ;; 72 72 … … 76 76 77 77 --libs) 78 echo -L${libdir} -lswish3 @ Z_LIBS@ @LIBS@78 echo -L${libdir} -lswish3 @LIBS@ 79 79 ;; 80 80 libswish3/trunk/src/libswish3/analyzer.c
r2041 r2096 69 69 SWISH_WARN("analyzer ref_cnt != 0: %d\n", a->ref_cnt); 70 70 } 71 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY) {71 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) { 72 72 SWISH_DEBUG_MSG("free analyzer"); 73 73 } libswish3/trunk/src/libswish3/config.c
r2046 r2096 24 24 */ 25 25 26 #include <libxml/xmlstring.h>27 26 #include <sys/param.h> 28 27 #include <stdio.h> … … 46 45 free_string(xmlChar *payload, xmlChar * key) 47 46 { 48 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG)47 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) 49 48 SWISH_DEBUG_MSG(" freeing config %s => %s", key, payload); 50 49 … … 55 54 free_props(swish_Property *prop, xmlChar *propname) 56 55 { 57 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG) {56 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) { 58 57 SWISH_DEBUG_MSG(" freeing config->prop %s", propname); 59 58 swish_debug_property((swish_Property*)prop); … … 68 67 free_metas(swish_MetaName *meta, xmlChar *metaname) 69 68 { 70 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG) {69 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) { 71 70 SWISH_DEBUG_MSG(" freeing config->meta %s", metaname); 72 71 swish_debug_metaname((swish_MetaName*)meta); … … 81 80 swish_free_config(swish_Config * config) 82 81 { 83 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG)82 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) 84 83 { 85 84 SWISH_DEBUG_MSG("freeing config"); … … 139 138 swish_MetaName *tmpmeta; 140 139 141 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG)140 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) 142 141 SWISH_DEBUG_MSG("setting default config"); 143 142 … … 159 158 ); 160 159 161 /* increm ref counts after they've been stashed. a little awkward, but saves var names... */ 160 /* alter swish_MetaName objects after they've been stashed. 161 a little awkward, but saves var names. 162 */ 162 163 tmpmeta = xmlHashLookup(config->metanames, (xmlChar*)SWISH_DEFAULT_METANAME); 163 164 tmpmeta->ref_cnt++; 165 tmpmeta->id = SWISH_META_DEFAULT_ID; 164 166 tmpmeta = xmlHashLookup(config->metanames, (xmlChar*)SWISH_TITLE_METANAME); 165 167 tmpmeta->ref_cnt++; 168 tmpmeta->id = SWISH_META_TITLE_ID; 166 169 167 170 … … 215 218 tmpprop = xmlHashLookup(config->properties, (xmlChar*)SWISH_PROP_DESCRIPTION); 216 219 tmpprop->ref_cnt++; 220 tmpprop->id = SWISH_PROP_DESCRIPTION_ID; 217 221 tmpprop = xmlHashLookup(config->properties, (xmlChar*)SWISH_PROP_TITLE); 218 222 tmpprop->ref_cnt++; 223 tmpprop->id = SWISH_PROP_TITLE_ID; 219 224 220 225 … … 233 238 config->flags->tokenize = 1; 234 239 235 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG) {240 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) { 236 241 SWISH_DEBUG_MSG("config_set_default done"); 237 242 swish_debug_config(config); … … 245 250 246 251 config = swish_parse_config(conf, config); 247 if (SWISH_DEBUG >=SWISH_DEBUG_CONFIG)252 if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) 248 253 swish_debug_config(config); 249 254 libswish3/trunk/src/libswish3/hash.c
r2090 r2096 21 21 /* wrappers to common functions in libxml2 hash */ 22 22 23 #include <libxml/hash.h>24 23 #include <stdlib.h> 25 24 libswish3/trunk/src/libswish3/header.c
r2046 r2096 20 20 /* read/write the swish.xml header file */ 21 21 22 #include <libxml/xmlstring.h>23 22 #include <libxml/xmlreader.h> 24 23 #include "libswish3.h" … … 37 36 swish_Config *config; 38 37 boolean is_valid; 38 unsigned int prop_id; 39 unsigned int meta_id; 39 40 } headmaker; 40 41 … … 92 93 newmeta = swish_init_metaname( newname ); 93 94 newmeta->ref_cnt++; 95 newmeta->id = h->meta_id++; 94 96 swish_hash_add( h->config->metanames, newmeta->name, newmeta ); 95 97 } … … 123 125 meta->bias = (boolean)strtol((char*)attr_val, (char**)NULL, 10); 124 126 } 127 else if (xmlStrEqual(attr, (xmlChar*)"id")) { 128 meta->id = (int)strtol((char*)attr_val, (char**)NULL, 10); 129 } 125 130 else if (xmlStrEqual(attr, (xmlChar*)"alias")) { 126 131 do_metaname_aliases( (xmlChar*)attr_val, h, meta ); … … 167 172 ); 168 173 } 169 170 } 174 175 } 176 177 // must have an id 178 if (!meta->id) { 179 meta->id = h->meta_id++; 180 } 181 171 182 172 183 if (!swish_hash_exists( h->config->metanames, meta->name )) { … … 210 221 newprop->ref_cnt++; 211 222 newprop->alias_for = swish_xstrdup( prop->name ); 223 newprop->id = h->prop_id++; 212 224 swish_hash_add( h->config->properties, newprop->name, newprop ); 213 225 //swish_debug_property(newprop); … … 245 257 else if (xmlStrEqual(attr, (xmlChar*)"sort")) { 246 258 prop->sort = (boolean)strtol((char*)attr_val, (char**)NULL, 10); 259 } 260 else if (xmlStrEqual(attr, (xmlChar*)"id")) { 261 prop->id = (boolean)strtol((char*)attr_val, (char**)NULL, 10); 247 262 } 248 263 else if (xmlStrEqual(attr, (xmlChar*)"type")) { … … 303 318 } 304 319 320 } 321 322 if (!prop->id) { 323 prop->id = h->prop_id++; 305 324 } 306 325 … … 524 543 h->ismetas = 0; 525 544 h->parent_name = NULL; 545 h->prop_id = SWISH_PROP_THIS_MUST_COME_LAST_ID; 546 h->meta_id = SWISH_META_THIS_MUST_COME_LAST_ID; 526 547 return h; 527 548 } libswish3/trunk/src/libswish3/libswish3.h
r2090 r2096 27 27 #include <libxml/parser.h> 28 28 #include <libxml/hash.h> 29 29 #include <libxml/xmlstring.h> 30 30 31 31 #define SWISH_LIB_VERSION "0.1.0" … … 102 102 #define SWISH_META_CONNECTOR "\3" 103 103 104 /* built-in id values */ 105 typedef enum { 106 SWISH_META_DEFAULT_ID = 0, 107 SWISH_META_TITLE_ID, 108 SWISH_META_THIS_MUST_COME_LAST_ID 109 } SWISH_META_ID; 110 111 typedef enum { 112 SWISH_PROP_DOCID_ID = 0, 113 SWISH_PROP_DOCPATH_ID, 114 SWISH_PROP_DBFILE_ID, 115 SWISH_PROP_TITLE_ID, 116 SWISH_PROP_SIZE_ID, 117 SWISH_PROP_MTIME_ID, 118 SWISH_PROP_DESCRIPTION_ID, 119 SWISH_PROP_NWORDS_ID, 120 SWISH_PROP_MIME_ID, 121 SWISH_PROP_PARSER_ID, 122 SWISH_PROP_THIS_MUST_COME_LAST_ID 123 } SWISH_PROP_ID; 124 125 /* xapian (maybe others) need string prefixes for metanames */ 126 #define SWISH_PREFIX_URL "U" 127 #define SWISH_PREFIX_MTIME "T" 128 129 104 130 /* utils */ 105 131 #define SWISH_MAX_WORD_LEN 256 … … 122 148 123 149 /* debugging levels */ 124 #define SWISH_DEBUG_DOCINFO 1 125 #define SWISH_DEBUG_TOKENIZER 2 126 #define SWISH_DEBUG_WORDLIST 4 127 #define SWISH_DEBUG_PARSER 8 128 #define SWISH_DEBUG_CONFIG 16 129 #define SWISH_DEBUG_MEMORY 32 130 #define SWISH_DEBUG_NAMEDBUFFER 64 150 typedef enum { 151 SWISH_DEBUG_DOCINFO = 1, 152 SWISH_DEBUG_TOKENIZER = 2, 153 SWISH_DEBUG_WORDLIST = 4, 154 SWISH_DEBUG_PARSER = 8, 155 SWISH_DEBUG_CONFIG = 16, 156 SWISH_DEBUG_MEMORY = 32, 157 SWISH_DEBUG_NAMEDBUFFER = 64 158 } SWISH_DEBUG_LEVELS; 131 159 132 160 #define SWISH_DEBUG_MSG(args...) \ … … 578 606 int autovivify); 579 607 void swish_append_buffer( xmlBufferPtr buf, xmlChar * txt, int len ); 608 xmlChar* swish_nb_get_value( swish_NamedBuffer* nb, xmlChar* key ); 580 609 /* 581 610 =cut libswish3/trunk/src/libswish3/namedbuffer.c
r2041 r2096 23 23 24 24 25 #include <libxml/hash.h>26 #include <libxml/xmlstring.h>27 25 #include <stdio.h> 28 26 #include <stdlib.h> … … 58 56 free_name_from_hash(void *buffer, xmlChar * name) 59 57 { 60 if (SWISH_DEBUG >=SWISH_DEBUG_NAMEDBUFFER)58 if (SWISH_DEBUG & SWISH_DEBUG_NAMEDBUFFER) 61 59 SWISH_DEBUG_MSG(" freeing NamedBuffer %s\n", name); 62 60 … … 195 193 } 196 194 195 xmlChar* 196 swish_nb_get_value( swish_NamedBuffer *nb, xmlChar *key ) 197 { 198 xmlBufferPtr buf; 199 buf = xmlHashLookup(nb->hash, key); 200 return (xmlChar*)xmlBufferContent(buf); 201 } libswish3/trunk/src/libswish3/parser.c
r2041 r2096 248 248 if (metaname != NULL && metacontent != NULL) 249 249 { 250 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)250 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 251 251 SWISH_DEBUG_MSG("found HTML meta: %s => %s", metaname, metacontent); 252 252 … … 274 274 275 275 276 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)276 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 277 277 { 278 278 fprintf(stderr, " >>> build_tag (%s (%s) ", tag, parser_data->tag); … … 311 311 swish_TagStack *s = parser_data->metastack; 312 312 313 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)313 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 314 314 SWISH_DEBUG_MSG("buffer is >>%s<< before flush, word_pos = %d", 315 315 xmlBufferContent(parser_data->meta_buf), parser_data->word_pos); … … 442 442 443 443 444 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)444 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 445 445 SWISH_DEBUG_MSG("checking config for '%s' in watched tags", parser_data->tag); 446 446 … … 449 449 if (swish_hash_exists(parser_data->s3->config->properties, parser_data->tag)) 450 450 { 451 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)451 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 452 452 SWISH_DEBUG_MSG(" %s = new property", parser_data->tag); 453 453 … … 457 457 parser_data->propstack = push_tag_stack(parser_data->propstack, parser_data->tag); 458 458 459 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)459 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 460 460 SWISH_DEBUG_MSG("%s pushed ok unto propstack", parser_data->tag); 461 461 } … … 464 464 if (swish_hash_exists(parser_data->s3->config->metanames, parser_data->tag)) 465 465 { 466 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)466 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 467 467 SWISH_DEBUG_MSG(" %s = new metaname", parser_data->tag); 468 468 … … 472 472 } 473 473 474 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)474 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 475 475 SWISH_DEBUG_MSG("config check for '%s' done", parser_data->tag); 476 476 … … 1671 1671 (xmlChar*)SWISH_DEFAULT_METANAME); 1672 1672 1673 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)1673 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1674 1674 SWISH_DEBUG_MSG("stack pushed for %s", parser_data->metastack->flat); 1675 1675 … … 1679 1679 if (out != NULL) 1680 1680 { 1681 if (SWISH_DEBUG >=SWISH_DEBUG_PARSER)1681 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1682 1682 SWISH_DEBUG_MSG("tmp text buffer being freed"); 1683 1683 libswish3/trunk/src/libswish3/string.c
r2090 r2096 24 24 25 25 #include <assert.h> 26 #include <libxml/hash.h>27 26 #include <wchar.h> 28 27 #include <ctype.h> libswish3/trunk/src/libswish3/swish.c
r2046 r2096 81 81 82 82 /* global debug flag */ 83 setenv("SWISH_DEBUG", "0", 0);84 setenv("SWISH_DEBUG_MEMORY", "0", 0);85 setenv("SWISH_DEBUG_CONFIG", "0", 0);86 setenv("SWISH_DEBUG_DOCINFO", "0", 0);87 setenv("SWISH_DEBUG_WORDLIST", "0", 0);83 setenv("SWISH_DEBUG", "0", 0); 84 setenv("SWISH_DEBUG_MEMORY", "0", 0); 85 setenv("SWISH_DEBUG_CONFIG", "0", 0); 86 setenv("SWISH_DEBUG_DOCINFO", "0", 0); 87 setenv("SWISH_DEBUG_WORDLIST", "0", 0); 88 88 setenv("SWISH_DEBUG_TOKENIZER", "0", 0); 89 setenv("SWISH_DEBUG_PARSER", "0", 0);89 setenv("SWISH_DEBUG_PARSER", "0", 0); 90 90 setenv("SWISH_DEBUG_NAMEDBUFFER", "0", 0); 91 91 if (!SWISH_DEBUG) { 92 92 93 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG"), (char**)NULL, 10); 94 95 /* additional env vars just increase the global var value */ 96 93 97 if ((int)strtol(getenv("SWISH_DEBUG_MEMORY"), (char**)NULL, 10)) { 94 98 SWISH_DEBUG += SWISH_DEBUG_MEMORY; 95 99 } 96 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG_CONFIG"), (char**)NULL, 10); 97 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG_DOCINFO"), (char**)NULL, 10); 98 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG_WORDLIST"), (char**)NULL, 10); 99 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG_PARSER"), (char**)NULL, 10); 100 SWISH_DEBUG += (int)strtol(getenv("SWISH_DEBUG_NAMEDBUFFER"), (char**)NULL, 10); 100 if ((int)strtol(getenv("SWISH_DEBUG_CONFIG"), (char**)NULL, 10)) { 101 SWISH_DEBUG += SWISH_DEBUG_CONFIG; 102 } 103 if ((int)strtol(getenv("SWISH_DEBUG_DOCINFO"), (char**)NULL, 10)) { 104 SWISH_DEBUG += SWISH_DEBUG_DOCINFO; 105 } 106 if ((int)strtol(getenv("SWISH_DEBUG_WORDLIST"), (char**)NULL, 10)) { 107 SWISH_DEBUG += SWISH_DEBUG_WORDLIST; 108 } 109 if ((int)strtol(getenv("SWISH_DEBUG_PARSER"), (char**)NULL, 10)) { 110 SWISH_DEBUG += SWISH_DEBUG_PARSER; 111 } 112 if ((int)strtol(getenv("SWISH_DEBUG_NAMEDBUFFER"), (char**)NULL, 10)) { 113 SWISH_DEBUG += SWISH_DEBUG_NAMEDBUFFER; 114 } 101 115 } 102 116 libswish3/trunk/src/libswish3/words.c
r2030 r2096 84 84 swish_Word *t; 85 85 86 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)86 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 87 87 SWISH_DEBUG_MSG("freeing swish_WordList"); 88 88 … … 91 91 while (list->current != NULL) 92 92 { 93 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)93 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 94 94 SWISH_DEBUG_MSG("free metaname: %s", list->current->metaname); 95 95 96 96 swish_xfree(list->current->metaname); 97 97 98 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)98 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 99 99 SWISH_DEBUG_MSG("free context: %s", list->current->context); 100 100 101 101 swish_xfree(list->current->context); 102 102 103 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)103 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 104 104 SWISH_DEBUG_MSG("free word: %s", list->current->word); 105 105 106 106 swish_xfree(list->current->word); 107 107 108 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)108 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 109 109 SWISH_DEBUG_MSG("free Word struct"); 110 110 … … 114 114 } 115 115 116 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)116 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 117 117 SWISH_DEBUG_MSG("reset nwords"); 118 118 119 119 list->nwords = 0; 120 120 121 if (SWISH_DEBUG >=SWISH_DEBUG_MEMORY)121 if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) 122 122 SWISH_DEBUG_MSG("free list"); 123 123 … … 263 263 } 264 264 265 if( SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER )265 if( SWISH_DEBUG & SWISH_DEBUG_TOKENIZER ) 266 266 SWISH_DEBUG_MSG(" %lc is %d bytes long", ch, len); 267 267 … … 304 304 in_word = 0; 305 305 306 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)306 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 307 307 SWISH_DEBUG_MSG("parsing string: '%ls' into words", wide); 308 308 … … 313 313 byte_count += bytes_in_chr((wint_t)c); 314 314 315 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)315 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 316 316 SWISH_DEBUG_MSG(" wchar: %lc lower: %lc int: %d %#x\n orig: %lc %ld %#lx (next is %lc)", 317 317 (wint_t) wide[i], … … 337 337 if (in_word) 338 338 { 339 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)339 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 340 340 SWISH_DEBUG_MSG("found end of token: '%lc'", (wint_t)c); 341 341 … … 359 359 else 360 360 { 361 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)361 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 362 362 SWISH_DEBUG_MSG("skipping token '%s' -- too short: %d", utf8_str, wl); 363 363 } … … 370 370 else 371 371 { 372 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)372 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 373 373 SWISH_DEBUG_MSG("ignoring char '%lc'", (wint_t)c); 374 374 … … 383 383 { 384 384 385 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)385 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 386 386 SWISH_DEBUG_MSG("adding to token: '%lc'", (wint_t)c); 387 387 … … 392 392 { 393 393 394 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)394 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 395 395 SWISH_DEBUG_MSG("forcing end of token: '%lc'", (wint_t)c); 396 396 … … 414 414 else 415 415 { 416 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)416 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 417 417 SWISH_DEBUG_MSG("skipping token '%ls' -- too short: %d", word, wl); 418 418 } … … 428 428 { 429 429 430 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)430 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 431 431 SWISH_DEBUG_MSG("start a token with '%lc'", (wint_t)c); 432 432 … … 522 522 in_word = 0; 523 523 524 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)524 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 525 525 SWISH_DEBUG_MSG("tokenizing string: '%s'", str); 526 526 … … 532 532 byte_count++; 533 533 534 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)534 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 535 535 SWISH_DEBUG_MSG(" char: %c lower: %c int: %d %#x (next is %c)", 536 536 str[i], … … 553 553 if (in_word) 554 554 { 555 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)555 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 556 556 SWISH_DEBUG_MSG("found end of token: '%c' at %d", c, byte_count); 557 557 … … 574 574 else 575 575 { 576 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)576 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 577 577 SWISH_DEBUG_MSG("skipping token '%s' -- too short: %d", word, wl); 578 578 } … … 583 583 else 584 584 { 585 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)585 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 586 586 SWISH_DEBUG_MSG("ignoring char '%c'", c); 587 587 … … 596 596 { 597 597 598 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)598 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 599 599 SWISH_DEBUG_MSG("adding to token: '%c' %d", c, byte_count); 600 600 … … 605 605 { 606 606 607 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)607 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 608 608 SWISH_DEBUG_MSG("forcing end of token: '%c' %d", c, byte_count); 609 609 … … 626 626 else 627 627 { 628 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)628 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 629 629 SWISH_DEBUG_MSG("skipping token '%s' -- too short: %d", word, wl); 630 630 } … … 638 638 { 639 639 640 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)640 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 641 641 SWISH_DEBUG_MSG("start a token with '%c' %d", c, byte_count); 642 642 … … 713 713 end = 0; 714 714 715 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)715 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 716 716 SWISH_DEBUG_MSG("Before: %ls", word); 717 717 … … 764 764 } 765 765 766 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)766 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 767 767 SWISH_DEBUG_MSG("After: %ls (stripped %d start chars, %d end chars)", word, start, end); 768 768 … … 778 778 end = 0; 779 779 780 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)780 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 781 781 SWISH_DEBUG_MSG("Before: %s", word); 782 782 … … 829 829 } 830 830 831 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)831 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 832 832 SWISH_DEBUG_MSG("After: %s (stripped %d start chars, %d end chars)", word, start, end); 833 833 … … 853 853 swish_Word *thisword = (swish_Word *) swish_xmalloc(sizeof(swish_Word)); 854 854 855 if (SWISH_DEBUG >=SWISH_DEBUG_TOKENIZER)855 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 856 856 { 857 857 SWISH_DEBUG_MSG(" >>>>>>>>swish_Word<<<<<<<<: %s", word); libswish3/trunk/src/swish_lint.c
r2027 r2096 27 27 #include <wctype.h> 28 28 #include <ctype.h> 29 #include <libxml/hash.h>30 29 #include <getopt.h> 31 30 libswish3/trunk/src/xapian/swish_xapian.cpp
r2090 r2096 18 18 */ 19 19 20 /* example Swish3 program using Xapian IR backend */ 20 /* example Swish3 program using Xapian IR backend. 21 many of the string conversion functions and the index_document() code 22 come nearly verbatim from the xapian-omega distribution. 23 24 */ 21 25 22 26 //#include <config.h> … … 84 88 }; 85 89 86 87 #define SWISH_PREFIX_URL "U"88 #define SWISH_PREFIX_MTIME "T"89 #define SWISH_PROP_LAST_MOD 090 90 91 91 // This ought to be enough for any of the conversions below. … … 195 195 swish_Property* prop; 196 196 prop = (swish_Property*)swish_hash_fetch(s3->config->properties, name); 197 SWISH_DEBUG_MSG("adding property %s [%d]: %s", name, prop->id, xmlBufferContent(buffer)); 197 198 doc.add_value(prop->id, (const char*)xmlBufferContent(buffer)); 198 199 } … … 216 217 // Put the data in the document 217 218 Xapian::Document newdocument; 219 xmlChar* title = (xmlChar*)swish_nb_get_value(parser_data->properties, (xmlChar*)SWISH_PROP_TITLE); 220 printf("title = %s", (char*)title); 218 221 string unique_id = SWISH_PREFIX_URL + string((const char*)parser_data->docinfo->uri); 219 222 string record = "url=" + string( (const char*)parser_data->docinfo->uri ); 220 record += "\ntitle=" + string((const char*) 221 swish_hash_fetch(parser_data->properties->hash, (xmlChar*)SWISH_PROP_TITLE)); 223 record += "\ntitle=" + string((const char*)title); 222 224 record += "\ntype=" + string( (const char*)parser_data->docinfo->mime ); 223 225 record += "\nmodtime=" + long_to_string(parser_data->docinfo->mtime); … … 228 230 indexer.set_document(newdocument); 229 231 indexer.increase_termpos(100); 230 newdocument.add_term(SWISH_PREFIX_MTIME + string((const char*)parser_data->docinfo->mime));232 newdocument.add_term(SWISH_PREFIX_MTIME + long_to_string(parser_data->docinfo->mtime)); 231 233 newdocument.add_term(unique_id); 232 234 … … 241 243 newdocument.add_term(date_term); // Year (YYYY) 242 244 243 // Add last_mod as a value to allow "sort by date". 244 newdocument.add_value(SWISH_PROP_LAST_MOD, 245 int_to_binary_string((uint32_t)parser_data->docinfo->mtime)); 245 // add all docinfo values as properties 246 newdocument.add_value(SWISH_PROP_MTIME_ID, long_to_string(parser_data->docinfo->mtime)); 247 newdocument.add_value(SWISH_PROP_DOCPATH_ID, string((const char*)parser_data->docinfo->uri)); 248 newdocument.add_value(SWISH_PROP_SIZE_ID, long_to_string(parser_data->docinfo->size)); 249 newdocument.add_value(SWISH_PROP_MIME_ID, string((const char*)parser_data->docinfo->mime)); 250 newdocument.add_value(SWISH_PROP_PARSER_ID, string((const char*)parser_data->docinfo->parser)); 251 newdocument.add_value(SWISH_PROP_NWORDS_ID, long_to_string(parser_data->docinfo->nwords)); 252 253 // title is special value 254 newdocument.add_value(SWISH_PROP_TITLE_ID, string((const char*)title)); 246 255 247 256 // add all metanames and properties
