Changeset 2178

Show
Ignore:
Timestamp:
09/26/08 23:59:57 (2 months ago)
Author:
karpet
Message:

some versions of html parser were passing through extra whitespace.
seems to be a specific libxml2 issue. in any case, added a new
whitespace check in both add to buf methods and perl bindings
(the latter where t/20-metanames.t was failing due to extra whitespace)

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • libswish3/trunk/bindings/perl/XS/Property.xs

    r2176 r2178  
    9595        if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) { 
    9696            warn("DESTROYing swish_Property object %s  [0x%x] [ref_cnt = %d]",  
    97                 SvPV(ST(0), PL_na), (int)self, self->ref_cnt); 
     97                SvPV(ST(0), PL_na), (long int)self, self->ref_cnt); 
    9898        } 
    9999         
  • libswish3/trunk/bindings/perl/t/20metanames.t

    r2172 r2178  
    3232    #diag(dump($data->metanames)); 
    3333 
    34     #$data->wordlist->debug; 
     34    #$data->tokens->debug; 
    3535 
    3636} 
  • libswish3/trunk/bindings/perl/xs_helpers.c

    r2176 r2178  
    565565    int bump            = strlen(SWISH_TOKENPOS_BUMPER); 
    566566    int len; 
     567 
     568    //warn("%s nb_content: '%s'\n", key, str); 
    567569         
    568570    /* analogous to @strings = split(/SWISH_TOKENPOS_BUMPER/, str) */ 
    569571    while((tmp = xmlStrstr(str, (xmlChar*)SWISH_TOKENPOS_BUMPER)) != NULL) 
    570572    { 
     573        //warn("%s split: '%s'\n", key, str); 
    571574        len = tmp - str; 
    572         if(len) 
     575        if(len && !swish_str_all_ws_len((xmlChar*)str, len)) { 
    573576            av_push(strings, newSVpvn((char*)str, len)); 
    574              
     577        } 
    575578        str = tmp + bump;  /* move the pointer up */ 
    576579    } 
    577580     
    578581    /* no match and/or last match */ 
    579     if (!xmlStrstr(str, (xmlChar*)SWISH_TOKENPOS_BUMPER)) { 
     582    if ( !xmlStrstr(str, (xmlChar*)SWISH_TOKENPOS_BUMPER)  
     583      && strlen((char*)str)  
     584      && !swish_str_all_ws((xmlChar*)str) 
     585    ) { 
    580586        av_push(strings, newSVpvn((char*)str, strlen((char*)str))); 
    581587    } 
  • libswish3/trunk/src/libswish3/config.c

    r2141 r2178  
    155155    if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) { 
    156156        SWISH_DEBUG_MSG("freeing config"); 
    157         SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (int)config, (int)config); 
     157        SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (long int)config, (long int)config); 
    158158        swish_mem_debug(); 
    159159    } 
     
    234234 
    235235    if (SWISH_DEBUG & SWISH_DEBUG_MEMORY) { 
    236         SWISH_DEBUG_MSG("config ptr 0x%x", (int)config); 
     236        SWISH_DEBUG_MSG("config ptr 0x%x", (long int)config); 
    237237    } 
    238238 
     
    412412{ 
    413413    SWISH_DEBUG_MSG("config->ref_cnt = %d", config->ref_cnt); 
    414     SWISH_DEBUG_MSG("config->stash address = 0x%x  %d", (int)config->stash, 
    415                     (int)config->stash); 
    416     SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (int)config, (int)config); 
     414    SWISH_DEBUG_MSG("config->stash address = 0x%x  %d", (long int)config->stash, 
     415                    (long int)config->stash); 
     416    SWISH_DEBUG_MSG("ptr addr: 0x%x  %d", (long int)config, (long int)config); 
    417417 
    418418    xmlHashScan(config->misc, (xmlHashScanner)config_printer, "misc conf"); 
  • libswish3/trunk/src/libswish3/libswish3.h

    r2176 r2178  
    468468xmlChar *           swish_str_skip_ws(xmlChar *s); 
    469469void                swish_str_trim_ws(xmlChar *string); 
    470 int                 swish_str_all_ws(xmlChar * s); 
     470boolean             swish_str_all_ws(xmlChar * s); 
     471boolean             swish_str_all_ws_len(xmlChar * s, int len); 
    471472void                swish_debug_wchars( const wchar_t * widechars ); 
    472473int                 swish_wchar_t_comp(const void *s1, const void *s2); 
  • libswish3/trunk/src/libswish3/namedbuffer.c

    r2150 r2178  
    171171    xmlBufferPtr buf = swish_hash_fetch(nb->hash, name); 
    172172 
     173/* if the str is nothing but whitespace, skip it */ 
     174    if (swish_str_all_ws(str)) { 
     175        if (SWISH_DEBUG & SWISH_DEBUG_NAMEDBUFFER) 
     176            SWISH_DEBUG_MSG("skipping all whitespace string '%s'", str); 
     177 
     178        return; 
     179    } 
     180 
    173181    if (!buf) { 
    174182        if (autovivify) { 
     
    196204    } 
    197205    else { 
    198 /* SWISH_DEBUG_MSG("adding '%s' to buffer '%s'", str, name); */ 
     206        if (SWISH_DEBUG & SWISH_DEBUG_NAMEDBUFFER)  
     207            SWISH_DEBUG_MSG("adding '%s' to buffer '%s'", str, name);  
    199208        swish_append_buffer(buf, str, len); 
    200209    } 
  • libswish3/trunk/src/libswish3/string.c

    r2141 r2178  
    541541} 
    542542 
    543 int 
     543boolean 
    544544swish_str_all_ws( 
    545545    xmlChar *s 
    546546) 
    547547{ 
    548     int len, i; 
    549     len = xmlStrlen(s); 
     548    return swish_str_all_ws_len(s, xmlStrlen(s)); 
     549
     550 
     551boolean 
     552swish_str_all_ws_len( 
     553    xmlChar * s,  
     554    int len 
     555
     556
     557    int i; 
    550558    for (i = 0; i < len; i++) { 
    551559        if (!isspace((int)s[i])) { 
     
    555563    return 1; 
    556564} 
     565 
    557566 
    558567void