Changeset 2155
- Timestamp:
- 09/18/08 23:51:41 (4 months ago)
- Files:
-
- libswish3/trunk/src/libswish3/libswish3.h (modified) (1 diff)
- libswish3/trunk/src/libswish3/parser.c (modified) (2 diffs)
- libswish3/trunk/src/libswish3/tokenizer.c (modified) (2 diffs)
- libswish3/trunk/src/test.sh (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
libswish3/trunk/src/libswish3/libswish3.h
r2153 r2155 144 144 #define SWISH_DEFAULT_ENCODING "UTF-8" 145 145 #define SWISH_LOCALE "en_US.UTF-8" 146 146 #define SWISH_ENCODING_ERROR 100 147 147 148 148 /* debugging levels */ libswish3/trunk/src/libswish3/parser.c
r2153 r2155 1944 1944 1945 1945 if (parser_data->docinfo->encoding != (xmlChar *)SWISH_DEFAULT_ENCODING) { 1946 SWISH_WARN("%s docinfo->encoding %s != %s", 1947 parser_data->docinfo->uri, parser_data->docinfo->encoding, SWISH_DEFAULT_ENCODING); 1948 1946 1949 if (!xmlStrncasecmp(parser_data->docinfo->encoding, (xmlChar *)"iso-8859-1", 10)) { 1947 1950 out = swish_xmalloc(size * 2); … … 1970 1973 1971 1974 if (!isolat1ToUTF8(out, &outlen, buffer, &size)) { 1972 SWISH_WARN("could not convert buf from iso-8859-1"); 1975 SWISH_WARN("could not convert buf from iso-8859-1: %s", buffer); 1976 swish_xfree(out); 1977 return SWISH_ENCODING_ERROR; 1978 } 1979 else { 1980 SWISH_WARN("converted %s from %s to %s", 1981 parser_data->docinfo->uri, "iso-8859-1", SWISH_DEFAULT_ENCODING); 1973 1982 } 1974 1983 libswish3/trunk/src/libswish3/tokenizer.c
r2153 r2155 677 677 if (inside_token) { 678 678 679 /* edge case */ 680 if ((chr_len + token_len) > s3->analyzer->maxwordlen) { 681 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 682 SWISH_DEBUG_MSG("token_len = %d forcing end of token: '%s'", 683 token_len, chr); 684 continue; 685 } 686 679 687 if (SWISH_DEBUG & SWISH_DEBUG_TOKENIZER) 680 688 SWISH_DEBUG_MSG("adding to token: '%s'", chr); 681 689 682 xmlStrncat(token, (const xmlChar *)chr, chr_len); 690 memcpy(&token[token_len], chr, chr_len * sizeof(xmlChar)); 691 token[token_len + chr_len] = '\0'; 683 692 token_len += chr_len; 684 693 … … 727 736 token_len = 0; 728 737 inside_token = 1; /* turn on flag */ 729 xmlStrncat(token, (const xmlChar *)chr, chr_len); 738 /* edge case */ 739 if (chr_len > s3->analyzer->maxwordlen) 740 continue; 741 742 memcpy(&token[0], chr, chr_len * sizeof(xmlChar)); 743 token[chr_len] = '\0'; 730 744 token_len += chr_len; 731 745 libswish3/trunk/src/test.sh
r2116 r2155 1 #!/bin/sh 1 #!/bin/sh -x 2 2 3 3 PERL_DL_NONLAZY=1 perl "-MExtUtils::Command::MM" "-e" "test_harness(0, 't')" t/*.t
