Changeset 2107

Show
Ignore:
Timestamp:
03/31/08 14:39:58 (2 months ago)
Author:
jmruiz
Message:

Getting rid of remove_worddata_longs routine in compress.c

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • swish-e/branches/2.6/src/compress.c

    r2079 r2107  
    936936} 
    937937 
    938  
    939 /* 2002/09 jmruiz 
    940 ** This routine changes longs in worddata by shorter compressed 
    941 ** numbers. 
    942 ** 
    943 ** Here are two reasons for using compressed numbers in worddata 
    944 ** instead of longs: 
    945 **   - Compressed numbers are more portable: longs are usually 4 bytes 
    946 **     long in a 32 bit machine but in a 64 bit alpha they are 8 bytes 
    947 **     long (this a waste of space). 
    948 **   - The obvious one is that compressed numbers use less disk space 
    949 ** 
    950 ** BTW, Any change in worddata will also affect to dump.c, merge.c and search.c 
    951 ** (getfileinfo routine). 
    952 ** 
    953 **  worddata has the following format before entering the routine 
    954 **  <tfreq><metaID><nextposmetaID><data><metaID><nextposmetaID><data>... 
    955 ** 
    956 **  Entering this routine nextposmetaID is the offset to next metaid 
    957 **  in bytes starting to count them from the begining of worddata. 
    958 **  It is a packed long number (sizeof(long) bytes). 
    959 ** 
    960 **  Exiting this routine, nextposmetaID has changed to be the size of 
    961 **  the data block and is stored as a compressed number. 
    962 ** 
    963 **  In other words, worddata has the following format: 
    964 **  <tfreq><metaID><data_len><data><metaID><data_len><data>... 
    965 ** 
    966 */ 
    967 void    remove_worddata_longs(unsigned char *worddata,int *sz_worddata) 
    968 { 
    969     unsigned char *src,*dst;   //source and dest pointers for worddata 
    970     unsigned int metaID, tfrequency, data_len; 
    971     unsigned long nextposmetaID; 
    972  
    973     src = worddata; 
    974  
    975     /* Jump over tfrequency and get first metaID */ 
    976     tfrequency = uncompress2(&src);     /* tfrequency */ 
    977     metaID = uncompress2(&src);     /* metaID */ 
    978     dst = src; 
    979  
    980     while(1) 
    981     { 
    982         /* Get offset to next one */ 
    983         nextposmetaID = UNPACKLONG2(src); 
    984         src += sizeof(long); 
    985  
    986         /* Compute data length for this metaID */ 
    987         data_len = (int)nextposmetaID - (src - worddata); 
    988  
    989         /* Store data_len as a compressed number */ 
    990         dst = compress3(data_len,dst); 
    991  
    992         /* This must not happen. Anyway check it */ 
    993         if(dst > src) 
    994             progerr("Internal error in remove_worddata_longs"); 
    995  
    996         /* dst may be smaller than src. So move the data */ 
    997         /* valgrind complains that dst and src overlap, */ 
    998         /* which results in undefined behavior with memcpy, so use memmove. */ 
    999         /* Q: Is it correct for dst and src to overlap here? */ 
    1000         memmove(dst,src,data_len); 
    1001  
    1002         /* Increase pointers */ 
    1003         src += data_len; 
    1004         dst += data_len; 
    1005  
    1006         /* Check if we are at the end of the buffer */ 
    1007         if ((src - worddata) == *sz_worddata) 
    1008             break;   /* End of worddata */ 
    1009  
    1010         /* Get next metaID */ 
    1011         metaID = uncompress2(&src); 
    1012         dst = compress3(metaID,dst); 
    1013     } 
    1014     /* Adjust to new size */ 
    1015     *sz_worddata = dst - worddata; 
    1016 } 
  • swish-e/branches/2.6/src/compress.h

    r1736 r2107  
    6363int compress_worddata(unsigned char *, int, int ); 
    6464void uncompress_worddata(unsigned char **,int *, int); 
    65 void    remove_worddata_longs(unsigned char *,int *); 
    6665 
    6766/* Here is the worst case size for a compressed number  
  • swish-e/branches/2.6/src/db_write.c

    r1944 r2107  
    306306/* 04/2002 jmruiz 
    307307** New simpler routine to write worddata 
    308 ** 
    309 ** 10/2002 jmruiz 
    310 ** Add extra compression for worddata. Call to remove_worddata_longs 
    311308*/ 
    312309void write_worddata(SWISH * sw, ENTRY * ep, IndexFILE * indexf ) 
    313310{ 
    314311    int zlib_size; 
    315  
    316     /* Get some extra compression */ 
    317     remove_worddata_longs(sw->Index->worddata_buffer,&sw->Index->sz_worddata_buffer); 
    318312 
    319313    if(sw->compressPositions) 
  • swish-e/branches/2.6/src/dump.c

    r1944 r2107  
    190190    unsigned int       *posdata; 
    191191    int     metadata_length; 
     192    long    r_nextposmeta; 
    192193    char    word[2]; 
    193194    char   *resultword; 
     
    248249            tmpval = uncompress2(&s);     /* tfrequency */ 
    249250            metaID = uncompress2(&s);     /* metaID */ 
    250             metadata_length = uncompress2(&s); 
     251 
     252            r_nextposmeta = UNPACKLONG2(s); 
     253            s += sizeof(long); 
     254            metadata_length = (int)r_nextposmeta - (s - worddata); 
    251255 
    252256            filenum = 0; 
     
    274278                    filenum = 0; 
    275279                    metaID = uncompress2(&s); 
    276                     metadata_length = uncompress2(&s); 
     280 
     281                    r_nextposmeta = UNPACKLONG2(s); 
     282                    s += sizeof(long); 
     283                    metadata_length = (int)r_nextposmeta - (s - worddata); 
     284 
    277285                    start = s; 
    278286                } 
     
    331339            tmpval = uncompress2(&s);     /* tfrequency */ 
    332340            metaID = uncompress2(&s);     /* metaID */ 
    333             metadata_length = uncompress2(&s); 
     341 
     342            r_nextposmeta = UNPACKLONG2(s); 
     343            s += sizeof(long); 
     344            metadata_length = (int)r_nextposmeta - (s - worddata); 
     345 
    334346 
    335347            filenum = 0; 
     
    428440                    filenum = 0; 
    429441                    metaID = uncompress2(&s); 
    430                     metadata_length = uncompress2(&s); 
     442 
     443                    r_nextposmeta = UNPACKLONG2(s); 
     444                    s += sizeof(long); 
     445                    metadata_length = (int)r_nextposmeta - (s - worddata); 
     446 
    431447                    start = s; 
    432448                } 
  • swish-e/branches/2.6/src/merge.c

    r1976 r2107  
    9393    DB_WORDID   *wordID, *tmp; 
    9494    int          metadata_length = 0; 
     95    long         r_nextposmeta; 
    9596    unsigned char   *worddata; 
    9697    unsigned char   *s, *start; 
     
    230231                            if (metaID) 
    231232                            { 
    232                                 metadata_length = uncompress2(&s); 
     233                                r_nextposmeta = UNPACKLONG2(s); 
     234                                s += sizeof(long); 
     235                                metadata_length = (int)r_nextposmeta - (s - worddata); 
    233236                            } 
    234237 
     
    279282                                    filenum = 0; 
    280283                                    metaID = uncompress2(&s); 
    281                                     metadata_length = uncompress2(&s); 
     284 
     285                                    r_nextposmeta = UNPACKLONG2(s); 
     286                                    s += sizeof(long); 
     287                                    metadata_length = (int)r_nextposmeta - (s - worddata); 
     288 
    282289                                    start = s; 
    283290                                } 
  • swish-e/branches/2.6/src/search.c

    r1944 r2107  
    15091509    DB_WORDID    *wordID, *tmp; 
    15101510    int     metadata_length; 
     1511    long    r_nextposmeta; 
    15111512    char   *p; 
    15121513    int     tfrequency = 0; 
     
    17281729           while (curmetaID) 
    17291730           { 
    1730                metadata_length = uncompress2(&s); 
     1731               r_nextposmeta = UNPACKLONG2(s); 
     1732               s += sizeof(long); 
     1733 
     1734               metadata_length = (int)r_nextposmeta - (s - buffer); 
    17311735             
    17321736               if (curmetaID >= metaID)