Show
Ignore:
Timestamp:
04/28/08 22:02:04 (4 months ago)
Author:
karpet
Message:

alternate utf8-savvy tokenizer with iterator. initial naive benchmark shows it is about as fast, with far fewer malloc/free calls. could like speed it up some by refactoring how "context" is stored internally

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • libswish3/trunk/src/swish_lint.c

    r2131 r2140  
    5555    {"debug", required_argument, 0, 'd'}, 
    5656    {"help", no_argument, 0, 'h'}, 
     57    {"tokenize3", no_argument, 0, 't'}, 
    5758    {0, 0, 0, 0} 
    5859}; 
     
    9293    printf("\tSWISH_DEBUG_NAMEDBUFFER 64\n"); 
    9394    printf("Set SWISH_PARSER_WARNINGS=1 to see libxml2 errors and warnings\n"); 
     95    printf("Set SWISH_WARNINGS=0 to turn off libswish3 warnings\n"); 
    9496    printf("stdin headers:\n"); 
    9597    printf("\tContent-Length\n"); 
     
    125127        swish_debug_docinfo(parser_data->docinfo); 
    126128 
    127     if (SWISH_DEBUG & SWISH_DEBUG_WORDLIST) 
     129    if (SWISH_DEBUG & SWISH_DEBUG_WORDLIST) { 
     130      if (parser_data->s3->analyzer->tokenlist) { 
     131        swish_debug_token_list(parser_data->token_iterator); 
     132      } 
     133      else { 
    128134        swish_debug_wordlist(parser_data->wordlist); 
     135      } 
     136    } 
    129137 
    130138    if (SWISH_DEBUG & SWISH_DEBUG_NAMEDBUFFER) { 
     
    145153    int option_index; 
    146154    int files; 
    147     int overwrite; 
    148155    char *etime; 
    149156    double start_time; 
     
    153160    option_index = 0; 
    154161    files = 0; 
    155     overwrite = 0; 
    156162    start_time = swish_time_elapsed(); 
    157163    s3 = swish_init_swish3(&handler, NULL); 
    158164 
    159     while ((ch = getopt_long(argc, argv, "c:d:f:h", longopts, &option_index)) != -1) { 
     165    while ((ch = getopt_long(argc, argv, "c:d:f:ht", longopts, &option_index)) != -1) { 
    160166 
    161167        switch (ch) { 
     
    184190            SWISH_DEBUG = swish_string_to_int(optarg); 
    185191            break; 
    186  
    187         case 'o': 
    188             overwrite = 1; 
    189             break; 
    190  
     192             
     193        case 't': 
     194            s3->analyzer->tokenlist = 1; 
     195            break; 
     196             
    191197        case '?': 
    192198        case 'h':