Changeset 2130
- Timestamp:
- 04/15/08 23:12:59 (1 month ago)
- Files:
-
- libswish3/trunk/src/libswish3/libswish3.h (modified) (3 diffs)
- libswish3/trunk/src/libswish3/parser.c (modified) (31 diffs)
- libswish3/trunk/src/t/004-metanames.t (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/libswish3.h
r2123 r2130 316 316 { 317 317 xmlChar *name; 318 xmlChar *metaname; 319 xmlChar *context; 318 320 struct swish_Tag *next; 319 321 unsigned int n; … … 326 328 unsigned int count; 327 329 char *name; // debugging aid -- name of the stack 328 xmlChar *flat; // all the stack item names as a string for convenience329 330 }; 330 331 … … 356 357 xmlChar *tag; // current tag name 357 358 swish_DocInfo *docinfo; // document-specific properties 358 unsigned intno_index; // toggle flag for special comments359 unsigned intis_html; // shortcut flag for html parser360 unsigned intbump_word; // boolean for moving word position/adding space359 boolean no_index; // toggle flag for special comments 360 boolean is_html; // shortcut flag for html parser 361 boolean bump_word; // boolean for moving word position/adding space 361 362 unsigned int word_pos; // word position in document 362 363 unsigned int offset; // current offset position libswish3/trunk/src/libswish3/parser.c
r2129 r2130 84 84 swish_ParserData *parser_data 85 85 ); 86 static swish_TagStack *push_tag_stack( 86 static void push_tag_stack( 87 swish_TagStack *stack, 88 xmlChar *tag, 89 xmlChar *metaname 90 ); 91 static swish_Tag *pop_tag_stack( 92 swish_TagStack *stack 93 ); 94 static swish_Tag *pop_tag_stack_on_match( 87 95 swish_TagStack *stack, 88 96 xmlChar *tag 89 97 ); 90 static int pop_tag_stack( 91 swish_TagStack *stack 92 ); 93 static xmlChar *pop_tag_stack_on_match( 94 swish_TagStack *stack, 95 xmlChar *tag 98 static void free_swishTag( 99 swish_Tag * st 96 100 ); 97 101 … … 292 296 SWISH_DEBUG_MSG(" tag: %s (%s) ", tag, parser_data->tag); 293 297 if (atts != NULL) { 294 SWISH_DEBUG_MSG(" has attributes [%d]", xmlStrlen((xmlChar *)atts));295 for (i = 0; (atts[i] != NULL); i +=2) {298 SWISH_DEBUG_MSG(" has attributes [%d]", xmlStrlen((xmlChar *)atts)); 299 for (i = 0; (atts[i] != NULL); i += 2) { 296 300 SWISH_DEBUG_MSG(" att: %s=", atts[i]); 297 if (atts[i +1] != NULL) {298 SWISH_DEBUG_MSG(" '%s'", atts[i +1]);301 if (atts[i + 1] != NULL) { 302 SWISH_DEBUG_MSG(" '%s'", atts[i + 1]); 299 303 } 300 304 } … … 403 407 swish_hash_fetch(parser_data->s3->config->stringlists, 404 408 (xmlChar *)SWISH_CLASS_ATTRIBUTES); 405 406 407 409 408 410 for (i = 0; (atts[i] != NULL); i += 2) { 409 411 … … 603 605 if (nb_attributes > 0) { 604 606 atts = swish_xmalloc(((nb_attributes * 2) + 1) * sizeof(xmlChar *)); 605 j =0;607 j = 0; 606 608 for (i = 0; i < nb_attributes * 5; i += 5) { 607 609 atts[j] = (xmlChar *)attributes[i]; 608 610 len = (int)(attributes[i + 4] - attributes[i + 3]); 609 611 if (len > 0) { 610 atts[j +1] = xmlStrsub(attributes[i + 3], 0, len);612 atts[j + 1] = xmlStrsub(attributes[i + 3], 0, len); 611 613 } 612 614 else { 613 615 atts[j] = NULL; 614 616 } 615 j +=2;617 j += 2; 616 618 } 617 619 atts[j] = NULL; 618 620 } 619 621 620 622 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 621 623 SWISH_DEBUG_MSG(" tag: %s nb_attributes %d", localname, nb_attributes); 622 624 if (atts != NULL) { 623 for (i = 0; (atts[i] != NULL); i +=2) {625 for (i = 0; (atts[i] != NULL); i += 2) { 624 626 SWISH_DEBUG_MSG(" att: %s=%s", atts[i], atts[i + 1]); 625 627 //SWISH_DEBUG_MSG(" att: %s=", atts[i++], atts[i] || ""); … … 674 676 SWISH_DEBUG_MSG(" %s = new property", parser_data->tag); 675 677 676 add_stack_to_prop_buf(NULL, parser_data); /* TODO why NULL * here ??*/678 add_stack_to_prop_buf(NULL, parser_data); /* NULL means all properties in the stack are added */ 677 679 xmlBufferEmpty(parser_data->prop_buf); 678 680 679 p arser_data->propstack = push_tag_stack(parser_data->propstack, parser_data->tag);681 push_tag_stack(parser_data->propstack, (xmlChar *)tag, parser_data->tag); 680 682 681 683 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) … … 691 693 692 694 flush_buffer(parser_data, parser_data->metastack->head->name, 693 parser_data->metastack-> flat);694 695 p arser_data->metastack = push_tag_stack(parser_data->metastack, parser_data->tag);695 parser_data->metastack->head->context); 696 697 push_tag_stack(parser_data->metastack, (xmlChar *)tag, parser_data->tag); 696 698 } 697 699 … … 707 709 ) 708 710 { 709 xmlChar *context;710 711 swish_ParserData *parser_data; 712 swish_Tag *st; 711 713 712 714 parser_data = (swish_ParserData *)data; … … 724 726 SWISH_DEBUG_MSG(" endElement(%s) (%s)", (xmlChar *)tag, parser_data->tag); 725 727 726 if ((context = 727 pop_tag_stack_on_match(parser_data->propstack, parser_data->tag)) != NULL) { 728 729 /* 730 * SWISH_DEBUG_MSG("popped %s from propstack", context); 731 */ 728 if ((st = pop_tag_stack_on_match(parser_data->propstack, (xmlChar *)tag)) != NULL) { 729 732 730 add_stack_to_prop_buf(parser_data->tag, parser_data); 733 731 xmlBufferEmpty(parser_data->prop_buf); 734 swish_xfree(context); 735 } 736 737 if ((context = 738 pop_tag_stack_on_match(parser_data->metastack, parser_data->tag)) != NULL) { 739 740 /* 741 * SWISH_DEBUG_MSG("popped %s from metastack", parser_data->tag); 742 */ 743 flush_buffer(parser_data, parser_data->tag, context); 744 swish_xfree(context); 732 free_swishTag(st); 733 } 734 735 if ((st = pop_tag_stack_on_match(parser_data->metastack, (xmlChar *)tag)) != NULL) { 736 737 flush_buffer(parser_data, st->metaname, st->context); 738 free_swishTag(st); 745 739 } 746 740 … … 1035 1029 ptr->metastack->head = NULL; 1036 1030 ptr->metastack->temp = NULL; 1037 ptr->metastack->flat = NULL;1038 1031 ptr->metastack->count = 0; 1039 ptr->metastack = push_tag_stack(ptr->metastack, (xmlChar *)SWISH_DEFAULT_METANAME); 1032 push_tag_stack(ptr->metastack, (xmlChar *)SWISH_DEFAULT_METANAME, 1033 (xmlChar *)SWISH_DEFAULT_METANAME); 1040 1034 1041 1035 ptr->propstack = (swish_TagStack *)swish_xmalloc(sizeof(swish_TagStack)); … … 1043 1037 ptr->propstack->head = NULL; 1044 1038 ptr->propstack->temp = NULL; 1045 ptr->propstack->flat = NULL;1046 1039 ptr->propstack->count = 0; 1047 p tr->propstack = push_tag_stack(ptr->propstack, (xmlChar *)"_");1040 push_tag_stack(ptr->propstack, (xmlChar *)"_", (xmlChar *)"_"); 1048 1041 1049 1042 /* … … 1095 1088 ) 1096 1089 { 1090 swish_Tag *st; 1097 1091 1098 1092 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) … … 1107 1101 * Pop the stacks 1108 1102 */ 1109 while ( pop_tag_stack(ptr->metastack)) {1103 while ((st = pop_tag_stack(ptr->metastack)) != NULL) { 1110 1104 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1111 SWISH_DEBUG_MSG("head of stack is %d %s", ptr->metastack->count, 1112 ptr->metastack->head->name); 1113 1105 SWISH_DEBUG_MSG("%s %d POP %s [%s] [%s]", ptr->metastack->name, 1106 ptr->metastack->count, st->name, st->metaname, st->context); 1107 1108 free_swishTag(st); 1114 1109 } 1115 1110 … … 1119 1114 swish_xfree(ptr->metastack); 1120 1115 1121 while ( pop_tag_stack(ptr->propstack)) {1116 while ((st = pop_tag_stack(ptr->propstack)) != NULL) { 1122 1117 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1123 SWISH_DEBUG_MSG("head of stack is %d %s", ptr->propstack->count, 1124 ptr->propstack->head->name); 1125 1118 SWISH_DEBUG_MSG("%s %d POP %s [%s] [%s]", ptr->propstack->name, 1119 ptr->propstack->count, st->name, st->metaname, st->context); 1120 1121 free_swishTag(st); 1126 1122 } 1127 1123 … … 1962 1958 */ 1963 1959 1964 p arser_data->metastack = push_tag_stack(parser_data->metastack, (xmlChar *)1965 SWISH_DEFAULT_METANAME);1960 push_tag_stack(parser_data->metastack, (xmlChar *)SWISH_DEFAULT_METANAME, 1961 (xmlChar *)SWISH_DEFAULT_METANAME); 1966 1962 1967 1963 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 1968 SWISH_DEBUG_MSG(" stack pushed for %s", parser_data->metastack->flat);1964 SWISH_DEBUG_MSG("%s stack PUSH %s", parser_data->metastack->head->context); 1969 1965 1970 1966 buffer_characters(parser_data, buffer, size); … … 2046 2042 2047 2043 if (context == NULL) 2048 context = parser_data->metastack-> flat;2044 context = parser_data->metastack->head->context; 2049 2045 2050 2046 swish_WordList *tmplist; … … 2127 2123 int i = 0; 2128 2124 2129 SWISH_DEBUG_MSG("%s '%s' stack->count: %d", stack->name, stack->flat, stack->count);2125 SWISH_DEBUG_MSG("%s stack->count: %d", stack->name, stack->count); 2130 2126 2131 2127 for (stack->temp = stack->head; stack->temp != NULL; stack->temp = stack->temp->next) { … … 2172 2168 size = ((xmlStrlen(flat) + (xmlStrlen(stack->temp->name)) * sizeof(xmlChar))) + 2; 2173 2169 tmp = swish_xmalloc(size); 2174 if (s printf((char *)tmp, "%s %s", (char *)flat, (char *)stack->temp->name) > 0) {2170 if (snprintf((char *)tmp, size, "%s %s", (char *)flat, (char *)stack->temp->name) > 0) { 2175 2171 if (flat != NULL) 2176 2172 swish_xfree(flat); … … 2193 2189 ) 2194 2190 { 2195 swish_TagStack *s ;2191 swish_TagStack *stack; 2196 2192 int cleanwsp; 2197 2193 swish_Property *prop; 2198 2194 2199 s = parser_data->propstack;2195 stack = parser_data->propstack; 2200 2196 cleanwsp = 1; 2197 2198 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2199 SWISH_DEBUG_MSG("adding property %s to buffer", tag); 2200 } 2201 2201 2202 2202 if (tag != NULL) { … … 2209 2209 cleanwsp = 0; 2210 2210 2211 /*2212 * SWISH_DEBUG_MSG(" add_stack_to_prop_buf: '%s'",2213 * xmlBufferContent(parser_data->prop_buf));2214 */2215 2216 2211 swish_add_buf_to_nb(parser_data->properties, tag, parser_data->prop_buf, 2217 2212 (xmlChar *)SWISH_PROP_CONNECTOR, cleanwsp, 0); … … 2219 2214 } 2220 2215 2221 for (s->temp = s->head; s->temp != NULL; s->temp = s->temp->next) { 2222 if (xmlStrEqual(s->temp->name, (xmlChar *)"_")) /* top of * the * 2223 * stack * * is * 2224 * just a * * * 2225 * placeholder */ 2216 // add for each member in the stack 2217 // TODO configurable?? 2218 for (stack->temp = stack->head; stack->temp != NULL; stack->temp = stack->temp->next) { 2219 if (xmlStrEqual(stack->temp->name, (xmlChar *)"_")) 2226 2220 continue; 2227 2221 2228 swish_add_buf_to_nb(parser_data->properties, s ->temp->name, parser_data->prop_buf,2222 swish_add_buf_to_nb(parser_data->properties, stack->temp->name, parser_data->prop_buf, 2229 2223 (xmlChar *)SWISH_PROP_CONNECTOR, cleanwsp, 0); 2230 2224 } … … 2232 2226 } 2233 2227 2234 static swish_TagStack * 2228 static void 2229 free_swishTag( 2230 swish_Tag * st 2231 ) 2232 { 2233 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2234 SWISH_DEBUG_MSG(" freeing swishTag: %s %s %s", st->name, st->metaname, st->context); 2235 } 2236 2237 swish_xfree(st->name); 2238 swish_xfree(st->metaname); 2239 swish_xfree(st->context); 2240 swish_xfree(st); 2241 } 2242 2243 static void 2235 2244 push_tag_stack( 2236 2245 swish_TagStack *stack, 2237 xmlChar *tag 2246 xmlChar *tag, 2247 xmlChar *metaname 2238 2248 ) 2239 2249 { … … 2242 2252 2243 2253 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2244 SWISH_DEBUG_MSG(" >>>>>>> before push: tag = '%s'", tag);2254 SWISH_DEBUG_MSG("%s PUSH: tag = '%s'", stack->name, tag); 2245 2255 _debug_stack(stack); 2246 2247 } 2248 2249 /* 2250 * assign this tag to the struct 2251 */ 2256 } 2257 2258 //assign this tag to the struct 2252 2259 thistag->name = swish_xstrdup(tag); 2253 2260 2254 /* 2255 * increment counter 2256 */ 2261 // the metaname (the normalized tag) 2262 thistag->metaname = swish_xstrdup(metaname); 2263 2264 //increment counter 2257 2265 thistag->n = stack->count++; 2258 2266 2259 /* 2260 * add to stack 2261 */ 2267 // push 2262 2268 thistag->next = stack->head; 2263 2269 stack->head = thistag; 2264 2270 2265 /* 2266 * cache the flattened value 2267 */ 2268 if (stack->flat != NULL) 2269 swish_xfree(stack->flat); 2270 2271 stack->flat = flatten_tag_stack(NULL, stack); 2271 // create context 2272 thistag->context = flatten_tag_stack(NULL, stack); 2272 2273 2273 2274 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2274 SWISH_DEBUG_MSG(" >>> stacksize: %d thistag count: %d current head tag = '%s'",2275 stack-> count, thistag->n, stack->head->name);2275 SWISH_DEBUG_MSG("%s size: %d thistag count: %d current head tag = '%s'", 2276 stack->name, stack->count, thistag->n, stack->head->context); 2276 2277 2277 2278 _debug_stack(stack); … … 2279 2280 } 2280 2281 2281 return stack; 2282 } 2283 2284 static int 2282 } 2283 2284 static swish_Tag * 2285 2285 pop_tag_stack( 2286 2286 swish_TagStack *stack 2287 2287 ) 2288 2288 { 2289 // stack is completely empty 2290 if (stack->head == NULL) 2291 return NULL; 2289 2292 2290 2293 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2291 SWISH_DEBUG_MSG(" pop_tag_stack: %s from %s", stack->head->name, stack->name);2294 SWISH_DEBUG_MSG("%s POP: %s", stack->name, stack->head->name); 2292 2295 _debug_stack(stack); 2293 2296 … … 2296 2299 if (stack->count > 1) { 2297 2300 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2298 SWISH_DEBUG_MSG(" >>> %d: popping '%s' from tagstack <<<", stack->head->n,2301 SWISH_DEBUG_MSG("%s %d: popping '%s'", stack->name, stack->head->n, 2299 2302 stack->head->name); 2300 2303 … … 2303 2306 stack->temp = stack->head; 2304 2307 stack->head = stack->head->next; 2305 2306 /*2307 * free the memory for the popped meta2308 */2309 swish_xfree(stack->temp->name);2310 stack->temp->n = 0;2311 swish_xfree(stack->temp);2312 2313 2308 stack->count--; 2314 2309 … … 2316 2311 else { 2317 2312 2313 // the stack has only one member 2314 2318 2315 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2319 SWISH_DEBUG_MSG 2320 (" >>> %d: popping '%s' from tagstack will leave stack empty (flat: %s) <<<",2321 stack->head->n, stack->head->name, stack->flat);2316 SWISH_DEBUG_MSG("%s %d: popping '%s' will leave stack empty [%s]", 2317 stack->name, stack->head->n, stack->head->name, 2318 stack->head->context); 2322 2319 2323 2320 } … … 2325 2322 stack->temp = stack->head; 2326 2323 stack->head = NULL; 2327 swish_xfree(stack->flat);2328 stack->flat = NULL;2329 2330 swish_xfree(stack->temp->name);2331 stack->temp->n = 0;2332 swish_xfree(stack->temp);2333 2324 stack->count--; 2334 2325 2335 return 0; 2336 } 2337 2338 /* 2339 * cache the flattened value 2340 */ 2341 if (stack->flat != NULL) 2342 swish_xfree(stack->flat); 2343 2344 stack->flat = flatten_tag_stack(NULL, stack); 2326 } 2345 2327 2346 2328 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2347 SWISH_DEBUG_MSG(" >> stack size = %d head of stack = %s <<", stack->count, 2348 stack->head->name); 2349 _debug_stack(stack); 2350 } 2351 2352 return stack->count; 2329 SWISH_DEBUG_MSG("%s stack count = %d", stack->name, stack->count); 2330 } 2331 2332 return stack->temp; 2353 2333 2354 2334 } 2355 2335 2356 2336 /* 2357 * returns previous ->flat if the current tag matches the top of the stack 2358 * and gets popped 2337 * returns top of the stack if the current tag matches. 2359 2338 */ 2360 static xmlChar*2339 static swish_Tag * 2361 2340 pop_tag_stack_on_match( 2362 2341 swish_TagStack *stack, … … 2365 2344 { 2366 2345 2367 xmlChar *prev_flat;2368 2369 prev_flat = swish_xstrdup(stack->flat);2346 swish_Tag *st; 2347 2348 st = NULL; 2370 2349 2371 2350 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2372 SWISH_DEBUG_MSG("pop_tag_stack_on_match() for %s", stack->name); 2373 SWISH_DEBUG_MSG("comparing '%s' against '%s'", tag, stack->head->name); 2351 SWISH_DEBUG_MSG("%s: POP if %s matches %s", stack->name, tag, stack->head->name); 2374 2352 _debug_stack(stack); 2375 2353 } … … 2378 2356 2379 2357 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2380 SWISH_DEBUG_MSG 2381 (" >>>>>>>>>>>>>>>>>>> current tag = '%s' matches top of tagstack", tag); 2358 SWISH_DEBUG_MSG("%s POP '%s' == head", stack->name, tag); 2382 2359 2383 2360 } … … 2386 2363 * more than default meta 2387 2364 */ 2388 if ( pop_tag_stack(stack)) {2365 if ((st = pop_tag_stack(stack)) != NULL) { 2389 2366 2390 2367 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2391 SWISH_DEBUG_MSG("stack popped. tag = %s stack->head = %s", tag, 2392 stack->head->name); 2368 SWISH_DEBUG_MSG("%s POPPED. tag = %s st->name = %s", stack->name, 2369 tag, st->name); 2370 2393 2371 _debug_stack(stack); 2394 2372 } … … 2397 2375 2398 2376 /* 2399 * only tag on stack 2377 * only tag on stack. TODO do we ever get here? 2400 2378 */ 2401 2379 else if (stack->count) { 2402 2380 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 2403 SWISH_DEBUG_MSG(" using stack->head %s", stack->head->name); 2404 2405 } 2406 2407 return prev_flat; 2381 SWISH_DEBUG_MSG("%s head %s", stack->name, stack->head->name); 2382 2383 } 2384 else { 2385 SWISH_CROAK("%s stack was empty", stack->name); 2386 } 2387 2408 2388 } 2409 2389 else { 2390 2410 2391 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) 2411 SWISH_DEBUG_MSG("no match for '%s'", tag); 2412 2413 } 2414 2415 swish_xfree(prev_flat); 2416 2417 return 0; 2418 } 2392 SWISH_DEBUG_MSG("%s: no match for '%s'", stack->name, tag); 2393 2394 } 2395 2396 if (SWISH_DEBUG & SWISH_DEBUG_PARSER) { 2397 if (st != NULL) 2398 SWISH_DEBUG_MSG("POP on match returning: %s", st->name); 2399 else 2400 SWISH_DEBUG_MSG("POP on match returning null"); 2401 } 2402 2403 return st; 2404 } libswish3/trunk/src/t/004-metanames.t
r2127 r2130 27 27 ); 28 28 29 like( $buf, qr!<swishtitle>mytitle here</swishtitle>!, "swishtitle" );30 like( $buf, qr!<mytag1> substr: \s+some text!s, "mytag1" );31 like( $buf, qr!<mytag1> substr: \s+yet again\s+and again!s, "mytag1 again" );32 like( $buf, qr!<mytag3.foo> substr: \s+blah blah!s, "mytag3.foo" );33 like( $buf, qr!<mytag3> substr: \s+foo bar!s, "mytag3" );29 like( $buf, qr!<swishtitle>mytitle here</swishtitle>!, "swishtitle" ); 30 like( $buf, qr!<mytag1> substr:\s+some text!s, "mytag1" ); 31 like( $buf, qr!<mytag1> substr:\s+yet again\s+and again!s, "mytag1 again" ); 32 like( $buf, qr!<mytag3.foo> substr:\s+blah blah!s, "mytag3.foo" ); 33 like( $buf, qr!<mytag3> substr:\s+foo bar!s, "mytag3" );
