Changeset 2009 for libswish3/trunk/src
- Timestamp:
- 02/03/08 23:29:35 (10 months ago)
- Files:
-
- libswish3/trunk/src/libswish3/libswish3.h (modified) (7 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/libswish3.h
r1955 r2009 50 50 51 51 /* default config hash key names */ 52 #define SWISH_INCLUDE_FILE "IncludeConfigFile"53 #define SWISH_PROP "PropertyNames"54 #define SWISH_PROP_ASIS "nostripchars"55 #define SWISH_PROP_MAX "PropertyNamesMaxLength"56 #define SWISH_PROP_SORT "PropertyNamesSortKeyLength"57 #define SWISH_META "MetaNames"58 #define SWISH_MIME "MIME"59 #define SWISH_PARSERS "Parsers"60 #define SWISH_INDEX "Index"61 #define SWISH_ALIAS "TagAlias"62 #define SWISH_WORDS "Words"63 #define SWISH_DEFAULT_PARSER "default"64 #define SWISH_PARSER_TXT "TXT"65 #define SWISH_PARSER_XML "XML"66 #define SWISH_PARSER_HTML "HTML"67 #define SWISH_DEFAULT_PARSER_TYPE "HTML"68 #define SWISH_INDEX_FORMAT "Format"69 #define SWISH_INDEX_NAME "Name"70 #define SWISH_INDEX_LOCALE "Locale"71 #define SWISH_DEFAULT_VALUE "1"72 #define SWISH_PARSE_WORDS "Tokenize"52 #define SWISH_INCLUDE_FILE "IncludeConfigFile" 53 #define SWISH_PROP "PropertyNames" 54 #define SWISH_PROP_ASIS "nostripchars" 55 #define SWISH_PROP_MAX "PropertyNamesMaxLength" 56 #define SWISH_PROP_SORT "PropertyNamesSortKeyLength" 57 #define SWISH_META "MetaNames" 58 #define SWISH_MIME "MIME" 59 #define SWISH_PARSERS "Parsers" 60 #define SWISH_INDEX "Index" 61 #define SWISH_ALIAS "TagAlias" 62 #define SWISH_WORDS "Words" 63 #define SWISH_DEFAULT_PARSER "default" 64 #define SWISH_PARSER_TXT "TXT" 65 #define SWISH_PARSER_XML "XML" 66 #define SWISH_PARSER_HTML "HTML" 67 #define SWISH_DEFAULT_PARSER_TYPE "HTML" 68 #define SWISH_INDEX_FORMAT "Format" 69 #define SWISH_INDEX_NAME "Name" 70 #define SWISH_INDEX_LOCALE "Locale" 71 #define SWISH_DEFAULT_VALUE "1" 72 #define SWISH_PARSE_WORDS "Tokenize" 73 73 74 74 /* tags */ … … 154 154 typedef struct swish_MetaStackElement *swish_MetaStackElementPtr; 155 155 typedef struct swish_MetaStack swish_MetaStack; 156 typedef struct swish_MetaName swish_MetaName; 157 typedef struct swish_Property swish_Property; 156 158 typedef struct swish_Word swish_Word; 157 159 typedef struct swish_WordList swish_WordList; … … 176 178 struct swish_Config 177 179 { 178 int ref_cnt; /* for scripting languages */179 void *stash; /* for scripting languages */180 int ref_cnt; /* for bindings */ 181 void *stash; /* for bindings */ 180 182 xmlHashTablePtr conf; /* the meat */ 181 183 struct swish_ConfigFlags *flags; /* shortcuts for parsing */ … … 200 202 struct swish_NamedBuffer 201 203 { 202 int ref_cnt; /* for scripting languages */203 void *stash; /* for scripting languages */204 int ref_cnt; /* for bindings */ 205 void *stash; /* for bindings */ 204 206 xmlHashTablePtr hash; /* the meat */ 205 207 }; … … 218 220 }; 219 221 222 struct swish_MetaName 223 { 224 unsigned int id; 225 xmlChar *name; 226 int bias; 227 }; 228 229 struct swish_Property 230 { 231 unsigned int id; 232 xmlChar *name; 233 }; 220 234 221 235 struct swish_Word 222 236 { 223 unsigned int position; // word position in doc224 xmlChar *metaname; // immediate metaname225 xmlChar *context; // metaname ancestry226 xmlChar *word; // the word itself (NOTE stored as multibyte not wchar)227 unsigned int start_offset; // start byte228 unsigned int end_offset; // end byte237 unsigned int position; // word position in doc 238 xmlChar *metaname; // immediate metaname 239 xmlChar *context; // metaname ancestry 240 xmlChar *word; // the word itself (NOTE stored as multibyte not wchar) 241 unsigned int start_offset; // start byte 242 unsigned int end_offset; // end byte 229 243 struct swish_Word *next; // pointer to next swish_Word 230 244 struct swish_Word *prev; // pointer to prev swish_Word … … 233 247 struct swish_WordList 234 248 { 235 swish_Word *head;236 swish_Word *tail;237 swish_Word *current; // for iterating238 unsigned int nwords;239 unsigned int ref_cnt; // for scripting languages249 swish_Word *head; 250 swish_Word *tail; 251 swish_Word *current; // for iterating 252 unsigned int nwords; 253 unsigned int ref_cnt; // for bindings 240 254 }; 241 255 … … 281 295 struct swish_ParseData 282 296 { 283 xmlBufferPtr buf_ptr; // tmp text (MetaName)buffer297 xmlBufferPtr meta_buf; // tmp MetaName buffer 284 298 xmlBufferPtr prop_buf; // tmp Property buffer 285 299 xmlChar *tag; // current tag name libswish3/trunk/src/libswish3/parser.c
r1952 r2009 316 316 if (SWISH_DEBUG == SWISH_DEBUG_PARSER) 317 317 SWISH_DEBUG_MSG("buffer is >>%s<< before flush, word_pos = %d", 318 xmlBufferContent(parse_data-> buf_ptr), parse_data->word_pos);318 xmlBufferContent(parse_data->meta_buf), parse_data->word_pos); 319 319 320 320 /* since we only flush the buffer when metaname changes, and … … 325 325 parse_data->word_pos++; 326 326 327 /* add buf_ptras-is to metanames buffer under current tag.327 /* add meta_buf as-is to metanames buffer under current tag. 328 328 this gives us both tokens and raw text de-tagged but organized by metaname. 329 329 */ 330 330 swish_add_buf_to_nb( parse_data->metanames, 331 331 metaname, 332 parse_data->buf_ptr, (xmlChar*)SWISH_META_CONNECTOR, 0, 1); 332 parse_data->meta_buf, 333 (xmlChar*)SWISH_META_CONNECTOR, 334 0, 335 1); 333 336 334 337 if (parse_data->context_as_meta) … … 341 344 swish_add_buf_to_nb(parse_data->metanames, 342 345 s->temp->name, 343 parse_data->buf_ptr, (xmlChar*)SWISH_META_CONNECTOR, 0, 1); 346 parse_data->meta_buf, 347 (xmlChar*)SWISH_META_CONNECTOR, 348 0, 349 1); 344 350 } 345 351 } … … 349 355 350 356 tokenize( parse_data, 351 (xmlChar *)xmlBufferContent(parse_data-> buf_ptr),352 xmlBufferLength(parse_data-> buf_ptr),357 (xmlChar *)xmlBufferContent(parse_data->meta_buf), 358 xmlBufferLength(parse_data->meta_buf), 353 359 metaname, 354 360 context … … 356 362 } 357 363 358 xmlBufferEmpty(parse_data-> buf_ptr);364 xmlBufferEmpty(parse_data->meta_buf); 359 365 360 366 } … … 517 523 int i; 518 524 xmlChar output[len]; 519 xmlBufferPtr buf = parse_data-> buf_ptr;525 xmlBufferPtr buf = parse_data->meta_buf; 520 526 /* 521 527 * why not wchar_t ? len is number of bytes, not number of … … 740 746 ptr->stash = stash; 741 747 742 ptr-> buf_ptr= xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE);748 ptr->meta_buf = xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE); 743 749 ptr->prop_buf = xmlBufferCreateSize(SWISH_BUFFER_CHUNK_SIZE); 744 750 … … 849 855 SWISH_DEBUG_MSG("freeing swish_ParseData xmlBuffer"); 850 856 851 xmlBufferFree( ptr-> buf_ptr);857 xmlBufferFree( ptr->meta_buf ); 852 858 853 859 … … 1271 1277 swish_debug_docinfo(parse_data->docinfo); 1272 1278 SWISH_DEBUG_MSG(" word buffer length: %d bytes", 1273 xmlBufferLength(parse_data-> buf_ptr));1279 xmlBufferLength(parse_data->meta_buf)); 1274 1280 SWISH_DEBUG_MSG(" (%d words)", parse_data->docinfo->nwords); 1275 1281 } … … 1402 1408 swish_debug_docinfo(parse_data->docinfo); 1403 1409 SWISH_DEBUG_MSG(" word buffer length: %d bytes", 1404 xmlBufferLength(parse_data-> buf_ptr));1410 xmlBufferLength(parse_data->meta_buf)); 1405 1411 SWISH_DEBUG_MSG(" (%d words)", parse_data->docinfo->nwords); 1406 1412 } … … 1454 1460 swish_debug_docinfo(parse_data->docinfo); 1455 1461 SWISH_DEBUG_MSG(" word buffer length: %d bytes", 1456 xmlBufferLength(parse_data-> buf_ptr));1462 xmlBufferLength(parse_data->meta_buf)); 1457 1463 SWISH_DEBUG_MSG(" (%d words)", parse_data->docinfo->nwords); 1458 1464 }
