root/libswish3/trunk/src/libswish3/config.c

Revision 2178, 16.1 kB (checked in by karpet, 2 months ago)

some versions of html parser were passing through extra whitespace.
seems to be a specific libxml2 issue. in any case, added a new
whitespace check in both add to buf methods and perl bindings
(the latter where t/20-metanames.t was failing due to extra whitespace)

Line 
1
2 /*
3  * This file is part of libswish3
4  * Copyright (C) 2007 Peter Karman
5  *
6  *  libswish3 is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  libswish3 is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with libswish3; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 */
20
21 #include <sys/param.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <string.h>
26 #include <locale.h>
27 #include <err.h>
28
29 #include "libswish3.h"
30
31 extern int SWISH_DEBUG;
32
33 static void free_string(
34     xmlChar *payload,
35     xmlChar *key
36 );
37 static void free_props(
38     swish_Property *prop,
39     xmlChar *propname
40 );
41 static void free_metas(
42     swish_MetaName *meta,
43     xmlChar *metaname
44 );
45 static void config_printer(
46     xmlChar *val,
47     xmlChar *str,
48     xmlChar *key
49 );
50 static void stringlist_printer(
51     swish_StringList *strlist,
52     xmlChar *str,
53     xmlChar *key
54 );
55 static void property_printer(
56     swish_Property *prop,
57     xmlChar *str,
58     xmlChar *propname
59 );
60 static void metaname_printer(
61     swish_MetaName *meta,
62     xmlChar *str,
63     xmlChar *metaname
64 );
65 static void copy_property(
66     swish_Property *prop2,
67     xmlHashTablePtr props1,
68     xmlChar *prop2name
69 );
70 static void merge_properties(
71     xmlHashTablePtr props1,
72     xmlHashTablePtr props2
73 );
74 static void copy_metaname(
75     swish_MetaName *meta2,
76     xmlHashTablePtr metas1,
77     xmlChar *meta2name
78 );
79 static void merge_metanames(
80     xmlHashTablePtr metas1,
81     xmlHashTablePtr metas2
82 );
83 static void
84 free_stringlist(
85     swish_StringList *strlist,
86     xmlChar *key
87 );
88
89 static void
90 free_string(
91     xmlChar *payload,
92     xmlChar *key
93 )
94 {
95     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
96         SWISH_DEBUG_MSG("   freeing config %s => %s", key, payload);
97
98     swish_xfree(payload);
99 }
100
101 static void
102 free_stringlist(
103     swish_StringList *strlist,
104     xmlChar *key
105 )
106 {
107     int i;
108     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
109         SWISH_DEBUG_MSG("   freeing config->stringlists %s [%d strings]", key, strlist->n);
110         for(i=0; i<strlist->n; i++) {
111             SWISH_DEBUG_MSG("     string: %s", strlist->word[i]);
112         }
113     }
114
115     swish_free_stringlist(strlist);
116 }
117
118 static void
119 free_props(
120     swish_Property *prop,
121     xmlChar *propname
122 )
123 {
124     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
125         SWISH_DEBUG_MSG("   freeing config->prop %s", propname);
126         swish_debug_property((swish_Property *)prop);
127     }
128     prop->ref_cnt--;
129     if (prop->ref_cnt < 1) {
130         swish_free_property(prop);
131     }
132 }
133
134 static void
135 free_metas(
136     swish_MetaName *meta,
137     xmlChar *metaname
138 )
139 {
140     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
141         SWISH_DEBUG_MSG("   freeing config->meta %s", metaname);
142         swish_debug_metaname((swish_MetaName *)meta);
143     }
144     meta->ref_cnt--;
145     if (meta->ref_cnt < 1) {
146         swish_free_metaname(meta);
147     }
148 }
149
150 void
151 swish_free_config(
152     swish_Config *config
153 )
154 {
155     if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
156         SWISH_DEBUG_MSG("freeing config");
157         SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (long int)config, (long int)config);
158         swish_mem_debug();
159     }
160
161     xmlHashFree(config->misc, (xmlHashDeallocator)free_string);
162     xmlHashFree(config->properties, (xmlHashDeallocator)free_props);
163     xmlHashFree(config->metanames, (xmlHashDeallocator)free_metas);
164     xmlHashFree(config->tag_aliases, (xmlHashDeallocator)free_string);
165     xmlHashFree(config->parsers, (xmlHashDeallocator)free_string);
166     xmlHashFree(config->mimes, (xmlHashDeallocator)free_string);
167     xmlHashFree(config->index, (xmlHashDeallocator)free_string);
168     xmlHashFree(config->stringlists, (xmlHashDeallocator)free_stringlist);
169     swish_free_config_flags(config->flags);
170
171     if (config->ref_cnt != 0) {
172         SWISH_WARN("config ref_cnt != 0: %d", config->ref_cnt);
173     }
174
175     if (config->stash != NULL) {
176         SWISH_WARN("possible memory leak: config->stash was not freed");
177     }
178
179     swish_xfree(config);
180 }
181
182 swish_ConfigFlags *
183 swish_init_config_flags(
184 )
185 {
186     swish_ConfigFlags *flags;
187     flags = swish_xmalloc(sizeof(swish_ConfigFlags));
188     flags->tokenize = 1;
189     flags->context_as_meta = 0;
190     flags->meta_ids = swish_init_hash(8);
191     flags->prop_ids = swish_init_hash(8);
192     //flags->contexts = swish_init_hash(8);
193
194     return flags;
195 }
196
197 void
198 swish_free_config_flags(
199     swish_ConfigFlags * flags
200 )
201 {
202     /*
203        these hashes are for convenience and are really freed in swish_free_config()
204      */
205     xmlHashFree(flags->meta_ids, NULL);
206     xmlHashFree(flags->prop_ids, NULL);
207     swish_xfree(flags);
208 }
209
210 /* init config object */
211 swish_Config *
212 swish_init_config(
213 )
214 {
215     swish_Config *config;
216
217     if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
218         SWISH_DEBUG_MSG("init config");
219     }
220
221 /* the hashes will automatically grow as needed so we init with sane starting size */
222     config = swish_xmalloc(sizeof(swish_Config));
223     config->flags = swish_init_config_flags();
224     config->misc = swish_init_hash(8);
225     config->metanames = swish_init_hash(8);
226     config->properties = swish_init_hash(8);
227     config->parsers = swish_init_hash(8);
228     config->index = swish_init_hash(8);
229     config->tag_aliases = swish_init_hash(8);
230     config->stringlists = swish_init_hash(8);
231     config->mimes = NULL;
232     config->ref_cnt = 0;
233     config->stash = NULL;
234
235     if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) {
236         SWISH_DEBUG_MSG("config ptr 0x%x", (long int)config);
237     }
238
239     return config;
240
241 }
242
243 void
244 swish_config_set_default(
245     swish_Config *config
246 )
247 {
248     swish_Property *tmpprop;
249     swish_MetaName *tmpmeta;
250     xmlChar *tmpbuf;
251
252     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
253         SWISH_DEBUG_MSG("setting default config");
254
255 /* we xstrdup a lot in order to consistently free in swish_free_config() */
256
257 /* MIME types */
258     config->mimes = swish_mime_hash();
259
260     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
261         SWISH_DEBUG_MSG("mime hash set");
262
263 /* metanames */
264     // default
265     tmpmeta = swish_init_metaname(swish_xstrdup((xmlChar *)SWISH_DEFAULT_METANAME));
266     tmpmeta->ref_cnt++;
267     tmpmeta->id = SWISH_META_DEFAULT_ID;
268     tmpbuf = swish_int_to_string(SWISH_META_DEFAULT_ID);
269     swish_hash_add(config->flags->meta_ids, tmpbuf, tmpmeta);
270     swish_hash_add(config->metanames, (xmlChar *)SWISH_DEFAULT_METANAME, tmpmeta);
271     swish_xfree(tmpbuf);
272     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
273         SWISH_DEBUG_MSG("swishdefault metaname set");
274
275     // title
276     tmpmeta = swish_init_metaname(swish_xstrdup((xmlChar *)SWISH_TITLE_METANAME));
277     tmpmeta->ref_cnt++;
278     tmpmeta->id = SWISH_META_TITLE_ID;
279     tmpbuf = swish_int_to_string(SWISH_META_TITLE_ID);
280     swish_hash_add(config->flags->meta_ids, tmpbuf, tmpmeta);
281     swish_hash_add(config->metanames, (xmlChar *)SWISH_TITLE_METANAME, tmpmeta);
282     swish_xfree(tmpbuf);
283     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
284         SWISH_DEBUG_MSG("swishtitle metaname set");
285
286 /* properties */
287     // description
288     tmpprop = swish_init_property(swish_xstrdup((xmlChar *)SWISH_PROP_DESCRIPTION));
289     tmpprop->ref_cnt++;
290     tmpprop->id = SWISH_PROP_DESCRIPTION_ID;
291     swish_hash_add(config->properties, (xmlChar *)SWISH_PROP_DESCRIPTION, tmpprop);
292     tmpbuf = swish_int_to_string(SWISH_PROP_DESCRIPTION_ID);
293     swish_hash_add(config->flags->prop_ids, tmpbuf, tmpprop);
294     swish_xfree(tmpbuf);
295
296     // title
297     tmpprop = swish_init_property(swish_xstrdup((xmlChar *)SWISH_PROP_TITLE));
298     tmpprop->ref_cnt++;
299     tmpprop->id = SWISH_PROP_TITLE_ID;
300     swish_hash_add(config->properties, (xmlChar *)SWISH_PROP_TITLE, tmpprop);
301     tmpbuf = swish_int_to_string(SWISH_PROP_TITLE_ID);
302     swish_hash_add(config->flags->prop_ids, tmpbuf, tmpprop);
303     swish_xfree(tmpbuf);
304
305 /* parsers */
306     swish_hash_add(config->parsers, (xmlChar *)"text/plain",
307                    swish_xstrdup((xmlChar *)SWISH_PARSER_TXT));
308     swish_hash_add(config->parsers, (xmlChar *)"text/xml",
309                    swish_xstrdup((xmlChar *)SWISH_PARSER_XML));
310     swish_hash_add(config->parsers, (xmlChar *)"text/html",
311                    swish_xstrdup((xmlChar *)SWISH_PARSER_HTML));
312     swish_hash_add(config->parsers, (xmlChar *)SWISH_DEFAULT_PARSER,
313                    swish_xstrdup((xmlChar *)SWISH_DEFAULT_PARSER_TYPE));
314
315 /* index */
316     swish_hash_add(config->index, (xmlChar *)SWISH_INDEX_FORMAT,
317                    swish_xstrdup((xmlChar *)SWISH_INDEX_FILEFORMAT));
318     swish_hash_add(config->index, (xmlChar *)SWISH_INDEX_NAME,
319                    swish_xstrdup((xmlChar *)SWISH_INDEX_FILENAME));
320     swish_hash_add(config->index, (xmlChar *)SWISH_INDEX_LOCALE,
321                    swish_xstrdup((xmlChar *)setlocale(LC_ALL, "")));
322
323 /* aliases: other names a tag might be known as, for matching properties and
324      * metanames */
325     swish_hash_add(config->tag_aliases, (xmlChar *)SWISH_TITLE_TAG,
326                    swish_xstrdup((xmlChar *)SWISH_TITLE_METANAME));
327     swish_hash_add(config->tag_aliases, (xmlChar *)SWISH_BODY_TAG,
328                    swish_xstrdup((xmlChar *)SWISH_PROP_DESCRIPTION));
329
330     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
331         SWISH_DEBUG_MSG("config_set_default done");
332         swish_debug_config(config);
333     }
334
335 }
336
337 swish_Config *
338 swish_add_config(
339     xmlChar *conf,
340     swish_Config *config
341 )
342 {
343
344     config = swish_parse_config(conf, config);
345     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG)
346         swish_debug_config(config);
347
348     return config;
349
350 }
351
352 swish_Config *
353 swish_parse_config(
354     xmlChar *conf,
355     swish_Config *config
356 )
357 {
358     swish_merge_config_with_header((char *)conf, config);
359     return config;
360 }
361
362 static void
363 config_printer(
364     xmlChar *val,
365     xmlChar *str,
366     xmlChar *key
367 )
368 {
369     SWISH_DEBUG_MSG(" %s:  %s => %s", str, key, val);
370 }
371
372 static void
373 stringlist_printer(
374     swish_StringList *strlist,
375     xmlChar *str,
376     xmlChar *key
377 )
378 {
379     int i;
380     for(i=0; i<strlist->n; i++) {
381         SWISH_DEBUG_MSG(" %s: %s => %s", str, key, strlist->word[i]);
382     }
383 }
384
385 static void
386 property_printer(
387     swish_Property *prop,
388     xmlChar *str,
389     xmlChar *propname
390 )
391 {
392     SWISH_DEBUG_MSG(" %s:  %s =>", str, propname);
393     swish_debug_property(prop);
394 }
395
396 static void
397 metaname_printer(
398     swish_MetaName *meta,
399     xmlChar *str,
400     xmlChar *metaname
401 )
402 {
403     SWISH_DEBUG_MSG(" %s:  %s =>", str, metaname);
404     swish_debug_metaname(meta);
405 }
406
407 /* PUBLIC */
408 void
409 swish_debug_config(
410     swish_Config *config
411 )
412 {
413     SWISH_DEBUG_MSG("config->ref_cnt = %d", config->ref_cnt);
414     SWISH_DEBUG_MSG("config->stash address = 0x%x  %d", (long int)config->stash,
415                     (long int)config->stash);
416     SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (long int)config, (long int)config);
417
418     xmlHashScan(config->misc, (xmlHashScanner)config_printer, "misc conf");
419     xmlHashScan(config->stringlists, (xmlHashScanner)stringlist_printer, "stringlists");
420     xmlHashScan(config->properties, (xmlHashScanner)property_printer, "properties");
421     xmlHashScan(config->metanames, (xmlHashScanner)metaname_printer, "metanames");
422     xmlHashScan(config->parsers, (xmlHashScanner)config_printer, "parsers");
423     xmlHashScan(config->mimes, (xmlHashScanner)config_printer, "mimes");
424     xmlHashScan(config->index, (xmlHashScanner)config_printer, "index");
425     xmlHashScan(config->tag_aliases, (xmlHashScanner)config_printer, "tag_aliases");
426 }
427
428 static void
429 copy_property(
430     swish_Property *prop2,
431     xmlHashTablePtr props1,
432     xmlChar *prop2name
433 )
434 {
435     swish_Property *prop1;
436
437     if (swish_hash_exists(props1, prop2name)) {
438         prop1 = swish_hash_fetch(props1, prop2name);
439         if (prop1->name != NULL) {
440             swish_xfree(prop1->name);
441             prop1->name = swish_xstrdup(prop2->name);
442         }
443     }
444     else {
445         prop1 = swish_init_property(swish_xstrdup(prop2name));
446         prop1->ref_cnt++;
447         swish_hash_add(props1, prop1->name, prop1);
448     }
449 /*
450     SWISH_DEBUG_MSG("%s prop1->id = %d    %s prop2->id = %d",
451                     prop1->name, prop1->id, prop2->name, prop2->id);
452  */
453     prop1->id = prop2->id;       
454     prop1->ignore_case = prop2->ignore_case;
455     prop1->type = prop2->type;
456     prop1->verbatim = prop2->verbatim;
457     if (prop1->alias_for != NULL) {
458         swish_xfree(prop2->alias_for);
459     }
460     if (prop2->alias_for != NULL) {
461         prop1->alias_for = swish_xstrdup(prop2->alias_for);
462     }
463     prop1->max = prop2->max;
464     prop1->sort = prop2->sort;
465
466 }
467
468 static void
469 merge_properties(
470     xmlHashTablePtr props1,
471     xmlHashTablePtr props2
472 )
473 {
474     xmlHashScan(props2, (xmlHashScanner)copy_property, props1);
475 }
476
477 static void
478 copy_metaname(
479     swish_MetaName *meta2,
480     xmlHashTablePtr metas1,
481     xmlChar *meta2name
482 )
483 {
484     swish_MetaName *meta1;
485    
486     if (swish_hash_exists(metas1, meta2name)) {
487         meta1 = swish_hash_fetch(metas1, meta2name);
488         if (meta1->name != NULL) {
489             swish_xfree(meta1->name);
490             meta1->name = swish_xstrdup(meta2->name);
491         }   
492     }
493     else {
494         meta1 = swish_init_metaname(swish_xstrdup(meta2name));
495         meta1->ref_cnt++;
496         swish_hash_add(metas1, meta1->name, meta1);
497     }
498 /*     
499     SWISH_DEBUG_MSG("%s meta1->id = %d    %s meta2->id = %d",
500                     meta1->name, meta1->id, meta2->name, meta2->id);
501  */
502     // only change id if meta2->id is not already spoken for.
503     meta1->id = meta2->id;
504     meta1->bias = meta2->bias;
505     if (meta1->alias_for != NULL) {
506         swish_xfree(meta1->alias_for);
507     }
508     if (meta2->alias_for != NULL) {
509         meta1->alias_for = swish_xstrdup(meta2->alias_for);
510     }
511
512 }
513
514 static void
515 merge_metanames(
516     xmlHashTablePtr metas1,
517     xmlHashTablePtr metas2
518 )
519 {
520     xmlHashScan(metas2, (xmlHashScanner)copy_metaname, metas1);
521 }
522
523 static void
524 copy_strlist(
525     swish_StringList *strlist2,
526     xmlHashTablePtr strlists1,
527     xmlChar *key
528 )
529 {
530     swish_StringList *strlist1;
531     if (swish_hash_exists(strlists1, key)) {
532         strlist1 = swish_hash_fetch(strlists1, key);
533         swish_merge_stringlists(strlist2, strlist1);
534     }
535     else {
536         strlist1 = swish_copy_stringlist(strlist2);
537         swish_hash_add(strlists1, key, strlist1);
538     }
539 }
540
541 static void
542 merge_stringlists(
543     xmlHashTablePtr strlists1,
544     xmlHashTablePtr strlists2
545 )
546 {
547     xmlHashScan(strlists2, (xmlHashScanner)copy_strlist, strlists1);
548 }
549
550 void
551 swish_config_merge(
552     swish_Config *config1,
553     swish_Config *config2
554 )
555 {
556
557 /* values in config2 override and are set in config1 */
558     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
559         SWISH_DEBUG_MSG("merge properties");
560     }
561     merge_properties(config1->properties, config2->properties);
562
563     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
564         SWISH_DEBUG_MSG("merge metanames");
565     }
566     merge_metanames(config1->metanames, config2->metanames);
567
568     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
569         SWISH_DEBUG_MSG("merge parsers");
570     }
571     swish_hash_merge(config1->parsers, config2->parsers);
572
573     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
574         SWISH_DEBUG_MSG("merge mimes");
575     }
576     swish_hash_merge(config1->mimes, config2->mimes);
577
578     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
579         SWISH_DEBUG_MSG("merge index");
580     }
581     swish_hash_merge(config1->index, config2->index);
582
583     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
584         SWISH_DEBUG_MSG("merge tag_aliases");
585     }
586     swish_hash_merge(config1->tag_aliases, config2->tag_aliases);
587
588     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
589         SWISH_DEBUG_MSG("merge misc");
590     }
591     swish_hash_merge(config1->misc, config2->misc);
592    
593     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
594         SWISH_DEBUG_MSG("merge stringlists");
595     }
596     merge_stringlists(config1->stringlists, config2->stringlists);
597
598     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
599         SWISH_DEBUG_MSG("merge complete");
600     }
601
602 /* set flags */
603 /* TODO pull these settings from config proper. but where in process? */
604     config1->flags->tokenize = config2->flags->tokenize;
605     config1->flags->context_as_meta = config2->flags->context_as_meta;
606
607     if (SWISH_DEBUG & SWISH_DEBUG_CONFIG) {
608         SWISH_DEBUG_MSG("flags set");
609     }
610
611 }
Note: See TracBrowser for help on using the browser.