Changeset 1944
- Timestamp:
- 10/21/07 16:57:52 (9 months ago)
- Files:
-
- swish-e/branches/2.6/src/array.c (deleted)
- swish-e/branches/2.6/src/array.h (deleted)
- swish-e/branches/2.6/src/btree.c (deleted)
- swish-e/branches/2.6/src/btree.h (deleted)
- swish-e/branches/2.6/src/compress.c (modified) (1 diff)
- swish-e/branches/2.6/src/config.h (modified) (1 diff)
- swish-e/branches/2.6/src/db_native.c (modified) (90 diffs)
- swish-e/branches/2.6/src/db_native.h (modified) (7 diffs)
- swish-e/branches/2.6/src/db_read.c (modified) (15 diffs)
- swish-e/branches/2.6/src/db_write.c (modified) (12 diffs)
- swish-e/branches/2.6/src/dump.c (modified) (8 diffs)
- swish-e/branches/2.6/src/extprog.c (modified) (2 diffs)
- swish-e/branches/2.6/src/fhash.c (deleted)
- swish-e/branches/2.6/src/fhash.h (deleted)
- swish-e/branches/2.6/src/index.c (modified) (9 diffs)
- swish-e/branches/2.6/src/list.c (modified) (1 diff)
- swish-e/branches/2.6/src/merge.c (modified) (3 diffs)
- swish-e/branches/2.6/src/proplimit.c (modified) (1 diff)
- swish-e/branches/2.6/src/ramdisk.c (deleted)
- swish-e/branches/2.6/src/ramdisk.h (deleted)
- swish-e/branches/2.6/src/result_sort.c (modified) (1 diff)
- swish-e/branches/2.6/src/search.c (modified) (15 diffs)
- swish-e/branches/2.6/src/sw_db.h (modified) (6 diffs)
- swish-e/branches/2.6/src/swish.c (modified) (6 diffs)
- swish-e/branches/2.6/src/swish.h (modified) (2 diffs)
- swish-e/branches/2.6/src/worddata.c (deleted)
- swish-e/branches/2.6/src/worddata.h (deleted)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
swish-e/branches/2.6/src/compress.c
r1736 r1944 47 47 #include "index.h" 48 48 #include "hash.h" 49 #include "ramdisk.h"50 49 #include "swish_qsort.h" 51 50 #include "file.h" swish-e/branches/2.6/src/config.h
r1939 r1944 374 374 #define SORT_FILENAMES 0 375 375 376 /* 2001/10 jmruiz -- Added BTREE schema to store words */377 378 //#define USE_BTREE /* use --enable-incremental at configure time */379 380 /* If USE_BTREE then enable the ARRAY code for the pre-sorted indexes */381 382 376 #define sw_fopen fopen 383 377 #define sw_fclose fclose swish-e/branches/2.6/src/db_native.c
r1940 r1944 42 42 #include "sw_db.h" 43 43 #include "swish_qsort.h" 44 #include "ramdisk.h"45 44 #include "db_native.h" 46 45 47 #ifdef USE_BTREE48 #define WRITE_WORDS_RAMDISK 049 #else50 #define WRITE_WORDS_RAMDISK 151 #endif52 46 53 47 /* MAX_PATH used by Herman's NEAR feature but it seems to be a Windoze thing … … 72 66 Db->DB_name = (char *) estrdup("native"); 73 67 74 Db->DB_Create = DB_Create_Native;75 Db->DB_Open = DB_Open_Native;76 Db->DB_Close = DB_Close_Native;77 Db->DB_Remove = DB_Remove_Native;78 79 Db->DB_InitWriteHeader = DB_InitWriteHeader_Native;80 Db->DB_WriteHeaderData = DB_WriteHeaderData_Native;81 Db->DB_EndWriteHeader = DB_EndWriteHeader_Native;82 83 Db->DB_InitReadHeader = DB_InitReadHeader_Native;84 Db->DB_ReadHeaderData = DB_ReadHeaderData_Native;85 Db->DB_EndReadHeader = DB_EndReadHeader_Native;86 87 Db->DB_InitWriteWords = DB_InitWriteWords_Native;88 Db->DB_GetWordID = DB_GetWordID_Native;89 Db->DB_WriteWord = DB_WriteWord_Native;90 91 #ifndef USE_BTREE92 Db->DB_WriteWordHash = DB_WriteWordHash_Native;93 #else94 Db->DB_UpdateWordID = DB_UpdateWordID_Native;95 Db->DB_DeleteWordData = DB_DeleteWordData_Native;96 #endif97 98 Db->DB_WriteWordData = DB_WriteWordData_Native;99 Db->DB_EndWriteWords = DB_EndWriteWords_Native;100 101 Db->DB_InitReadWords = DB_InitReadWords_Native;102 Db->DB_ReadWordHash = DB_ReadWordHash_Native;103 Db->DB_ReadFirstWordInvertedIndex = DB_ReadFirstWordInvertedIndex_Native;104 Db->DB_ReadNextWordInvertedIndex = DB_ReadNextWordInvertedIndex_Native;105 Db->DB_ReadWordData = DB_ReadWordData_Native;106 Db->DB_EndReadWords = DB_EndReadWords_Native;107 108 Db->DB_WriteFileNum = DB_WriteFileNum_Native;109 Db->DB_ReadFileNum = DB_ReadFileNum_Native;110 Db->DB_CheckFileNum = DB_CheckFileNum_Native;111 Db->DB_RemoveFileNum = DB_RemoveFileNum_Native;112 113 Db->DB_InitWriteSortedIndex = DB_InitWriteSortedIndex_Native;114 Db->DB_WriteSortedIndex = DB_WriteSortedIndex_Native;115 Db->DB_EndWriteSortedIndex = DB_EndWriteSortedIndex_Native;116 117 Db->DB_InitReadSortedIndex = DB_InitReadSortedIndex_Native;118 Db->DB_ReadSortedIndex = DB_ReadSortedIndex_Native;119 Db->DB_ReadSortedData = DB_ReadSortedData_Native;120 Db->DB_EndReadSortedIndex = DB_EndReadSortedIndex_Native;121 122 Db->DB_InitWriteProperties = DB_InitWriteProperties_Native;123 Db->DB_WriteProperty = DB_WriteProperty_Native;124 Db->DB_WritePropPositions = DB_WritePropPositions_Native;125 Db->DB_ReadProperty = DB_ReadProperty_Native;126 Db->DB_ReadPropPositions = DB_ReadPropPositions_Native;127 Db->DB_Reopen_PropertiesForRead = DB_Reopen_PropertiesForRead_Native;128 129 #ifdef USE_BTREE130 Db->DB_WriteTotalWordsPerFile = DB_WriteTotalWordsPerFile_Native;131 Db->DB_ReadTotalWordsPerFile = DB_ReadTotalWordsPerFile_Native;132 #endif133 134 68 sw->Db = Db; 135 69 … … 161 95 */ 162 96 163 static void DB_CheckHeader(struct Handle_DBNative *SW_DB) 164 { 165 #ifndef USE_BTREE 166 long swish_magic; 167 long prop; 168 169 sw_fseek(SW_DB->fp, (sw_off_t)0, SEEK_SET); 170 swish_magic = readlong(SW_DB->fp, sw_fread); 171 172 if (swish_magic != SWISH_MAGIC) 173 { 174 set_progerr(INDEX_FILE_ERROR, SW_DB->sw, "File \"%s\" has an unknown format.", SW_DB->cur_index_file); 175 return; 176 } 177 178 SW_DB->unique_ID = readlong(SW_DB->fp, sw_fread); 179 prop = readlong(SW_DB->fp_prop, sw_fread); 180 181 if (SW_DB->unique_ID != prop) 182 { 183 set_progerr(INDEX_FILE_ERROR, SW_DB->sw, "Index file '%s' and property file '%s' are not related.", SW_DB->cur_index_file, SW_DB->cur_prop_file); 184 return; 185 } 186 #else 97 static void _DB_CheckHeader(struct Handle_DBNative *SW_DB) 98 { 187 99 long propindex, totwords, presorted, header; 188 100 SW_DB->unique_ID = readlong(SW_DB->fp_prop, sw_fread); … … 217 129 return; 218 130 } 219 #endif220 131 } 221 132 … … 230 141 SW_DB->sw = sw; /* for error messages */ 231 142 232 if (WRITE_WORDS_RAMDISK) 233 { 234 SW_DB->w_tell = ramdisk_tell; 235 SW_DB->w_write = ramdisk_write; 236 SW_DB->w_seek = ramdisk_seek; 237 SW_DB->w_read = ramdisk_read; 238 SW_DB->w_close = ramdisk_close; 239 SW_DB->w_putc = ramdisk_putc; 240 SW_DB->w_getc = ramdisk_getc; 241 } 242 else 243 { 244 SW_DB->w_tell = sw_ftell; 245 SW_DB->w_write = sw_fwrite; 246 SW_DB->w_seek = sw_fseek; 247 SW_DB->w_read = sw_fread; 248 SW_DB->w_close = sw_fclose; 249 SW_DB->w_putc = sw_fputc; 250 SW_DB->w_getc = sw_fgetc; 251 } 143 SW_DB->w_tell = sw_ftell; 144 SW_DB->w_write = sw_fwrite; 145 SW_DB->w_seek = sw_fseek; 146 SW_DB->w_read = sw_fread; 147 SW_DB->w_close = sw_fclose; 148 SW_DB->w_putc = sw_fputc; 149 SW_DB->w_getc = sw_fgetc; 252 150 253 151 SW_DB->dbname = estrdup(dbname); … … 296 194 } 297 195 298 #ifdef USE_BTREE 299 DB * OpenBerkeleyFile(char *filename, DBTYPE db_type, u_int32_t db_flags) 196 /* Routine to compare packed longs - required to get BTREE duplicate entries 197 ** sorted by wordID. wordID is a packed long 198 */ 199 int compare_packed_long(DB *dbp, const DBT *a, const DBT *b) 200 { 201 return memcmp(a->data, b->data, sizeof(long)); 202 } 203 204 205 DB * OpenBerkeleyFile(char *filename, DBTYPE db_type, u_int32_t db_flags, int dup) 300 206 { 301 207 DB *dbp; … … 303 209 if((db_ret = db_create(&dbp, NULL, 0))) 304 210 progerrno("Couldn't create BERKELEY DB resource"); 211 if(dup) 212 { 213 if((db_ret = dbp->set_flags(dbp, DB_DUPSORT))) 214 progerrno("Couldn't set DB_DUPSORT in DB Berkeley file \"%s\": ", filename); 215 if((db_ret = dbp->set_dup_compare(dbp,compare_packed_long))) 216 progerrno("Couldn't set DB_DUPSORT_ROUTINE in DB Berkeley file \"%s\": ", filename); 217 } 305 218 if((db_ret = dbp->open(dbp,NULL,filename,NULL,db_type,db_flags,0))) 306 219 { … … 312 225 } 313 226 314 DB * CreateBerkeleyFile(char *filename,DBTYPE db_type )315 { 316 return OpenBerkeleyFile(filename, db_type, DB_CREATE | DB_TRUNCATE );227 DB * CreateBerkeleyFile(char *filename,DBTYPE db_type, int dup) 228 { 229 return OpenBerkeleyFile(filename, db_type, DB_CREATE | DB_TRUNCATE, dup); 317 230 } 318 231 … … 346 259 } 347 260 } 348 #endif 349 350 /**********************/ 351 352 353 354 void *DB_Create_Native(SWISH *sw, char *dbname) 261 262 263 void *_DB_Create(SWISH *sw, char *dbname) 355 264 { 356 265 long swish_magic; 357 266 char *filename; 358 #ifdef USE_BTREE359 267 FILE *fp_tmp; 360 #else361 int i;362 #endif363 268 struct Handle_DBNative *SW_DB; 364 269 … … 384 289 385 290 386 /* Create index File */387 #ifndef USE_BTREE388 CreateEmptyFile(filename);389 if (!(SW_DB->fp = openIndexFILEForReadAndWrite(filename)))390 progerrno("Couldn't create the index file \"%s\": ", filename);391 392 SW_DB->cur_index_file = estrdup(filename);393 printlong(SW_DB->fp, swish_magic, sw_fwrite);394 printlong(SW_DB->fp, SW_DB->unique_ID, sw_fwrite);395 #endif396 397 291 /* Create property File */ 398 292 strcpy(filename, dbname); … … 411 305 printlong(SW_DB->fp_prop, SW_DB->unique_ID, sw_fwrite); 412 306 413 414 #ifdef USE_BTREE 415 /* Create Btree File */ 307 /* Create Btree Index File */ 416 308 strcpy(filename, dbname); 417 309 strcat(filename, BTREE_EXTENSION); … … 420 312 SW_DB->tmp_btree = 1; 421 313 #endif 422 SW_DB->db_btree = CreateBerkeleyFile(filename,DB_BTREE); 314 SW_DB->db_btree = CreateBerkeleyFile(filename,DB_BTREE,1); 315 /* Allow sorted duplicate items */ 423 316 SW_DB->cur_btree_file = estrdup(filename); 424 317 … … 431 324 SW_DB->tmp_worddata = 1; 432 325 #endif 433 SW_DB->db_worddata = CreateBerkeleyFile(filename,DB_RECNO );326 SW_DB->db_worddata = CreateBerkeleyFile(filename,DB_RECNO,0); 434 327 SW_DB->cur_worddata_file = estrdup(filename); 435 328 … … 501 394 SW_DB->tmp_hashfile = 1; 502 395 #endif 503 SW_DB->db_hashfile = CreateBerkeleyFile(filename,DB_HASH );396 SW_DB->db_hashfile = CreateBerkeleyFile(filename,DB_HASH,0); 504 397 SW_DB->cur_hashfile_file = estrdup(filename); 505 398 506 507 #endif508 509 399 efree(filename); 510 400 511 #ifndef USE_BTREE512 for (i = 0; i < MAXCHARS; i++)513 SW_DB->offsets[i] = (sw_off_t)0;514 for (i = 0; i < VERYBIGHASHSIZE; i++)515 SW_DB->hashoffsets[i] = (sw_off_t)0;516 for (i = 0; i < VERYBIGHASHSIZE; i++)517 SW_DB->lasthashval[i] = (sw_off_t)0;518 519 /* Reserve space for offset pointers */520 SW_DB->offsetstart = sw_ftell(SW_DB->fp);521 for (i = 0; i < MAXCHARS; i++)522 printfileoffset(SW_DB->fp, (sw_off_t) 0, sw_fwrite);523 524 SW_DB->hashstart = sw_ftell(SW_DB->fp);525 for (i = 0; i < VERYBIGHASHSIZE; i++)526 printfileoffset(SW_DB->fp, (sw_off_t) 0, sw_fwrite);527 #endif528 529 401 return (void *) SW_DB; 530 402 } … … 532 404 533 405 /******************************************************************* 534 * DB_Open_Native406 * _DB_Open 535 407 * 536 408 *******************************************************************/ 537 409 538 void * DB_Open_Native(SWISH *sw, char *dbname,int mode)410 void *_DB_Open(SWISH *sw, char *dbname,int mode) 539 411 { 540 412 struct Handle_DBNative *SW_DB; 541 413 FILE *(*openRoutine)(char *) = NULL; 542 414 char *s; 543 #ifdef USE_BTREE544 415 u_int32_t db_flags; 545 #else546 int i;547 #endif548 416 549 417 switch(mode) … … 551 419 case DB_READ: 552 420 openRoutine = openIndexFILEForRead; 553 #ifdef USE_BTREE554 421 db_flags = DB_RDONLY; 555 #endif556 422 break; 557 423 case DB_READWRITE: 558 424 openRoutine = openIndexFILEForReadAndWrite; 559 #ifdef USE_BTREE560 425 db_flags = 0; 561 #endif562 426 break; 563 427 default: 564 428 openRoutine = openIndexFILEForRead; 565 #ifdef USE_BTREE566 429 db_flags = DB_RDONLY; 567 #endif568 430 } 569 431 … … 571 433 SW_DB->mode = mode; 572 434 573 #ifndef USE_BTREE574 /* Open index File */575 if (!(SW_DB->fp = openRoutine(dbname)))576 {577 set_progerrno(INDEX_FILE_ERROR, SW_DB->sw, "Could not open the index file '%s': ", dbname);578 return (void *) SW_DB;579 }580 581 SW_DB->cur_index_file = estrdup(dbname);582 #endif583 584 435 s = emalloc(strlen(dbname) + strlen(PROPFILE_EXTENSION) + 1); 585 436 … … 594 445 595 446 SW_DB->cur_prop_file = s; 596 597 #ifdef USE_BTREE598 447 599 448 s = emalloc(strlen(dbname) + strlen(BTREE_EXTENSION) + 1); 600 449 strcpy(s, dbname); 601 450 strcat(s, BTREE_EXTENSION); 602 SW_DB->db_btree = OpenBerkeleyFile(s, DB_BTREE, db_flags );451 SW_DB->db_btree = OpenBerkeleyFile(s, DB_BTREE, db_flags, 1); 603 452 604 453 SW_DB->cur_btree_file = s; … … 659 508 strcpy(s, dbname); 660 509 strcat(s, WORDDATA_EXTENSION); 661 SW_DB->db_worddata = OpenBerkeleyFile(s, DB_RECNO, db_flags );510 SW_DB->db_worddata = OpenBerkeleyFile(s, DB_RECNO, db_flags, 0); 662 511 SW_DB->cur_worddata_file = s; 663 512 … … 665 514 strcpy(s, dbname); 666 515 strcat(s, HASHFILE_EXTENSION); 667 SW_DB->db_hashfile = OpenBerkeleyFile(s, DB_HASH, db_flags );516 SW_DB->db_hashfile = OpenBerkeleyFile(s, DB_HASH, db_flags, 0); 668 517 SW_DB->cur_hashfile_file = s; 669 518 670 671 #endif672 673 519 /* Validate index files */ 674 DB_CheckHeader(SW_DB);520 _DB_CheckHeader(SW_DB); 675 521 if ( SW_DB->sw->lasterror ) 676 522 return (void *) SW_DB; 677 678 #ifndef USE_BTREE679 /* Read offsets lookuptable */680 SW_DB->offsetstart = sw_ftell(SW_DB->fp);681 for (i = 0; i < MAXCHARS; i++)682 SW_DB->offsets[i] = readfileoffset(SW_DB->fp, sw_fread);683 684 /* Read hashoffsets lookuptable */685 SW_DB->hashstart = sw_ftell(SW_DB->fp);686 for (i = 0; i < VERYBIGHASHSIZE; i++)687 SW_DB->hashoffsets[i] = readfileoffset(SW_DB->fp, sw_fread);688 #else689 523 690 524 /* Put the file pointer of props, propindex and totwords files … … 696 530 sw_fseek(SW_DB->fp_propindex,(sw_off_t)0,SEEK_END); 697 531 sw_fseek(SW_DB->fp_totwords,(sw_off_t)0,SEEK_END); 698 #endif699 532 700 533 return (void *) SW_DB; … … 708 541 *****************************************************************/ 709 542 710 static void DB_Close_File_Native(FILE ** fp, char **filename, int *tempflag)543 static void _DB_Close_File(FILE ** fp, char **filename, int *tempflag) 711 544 { 712 545 #if defined(_WIN32) && !defined(__CYGWIN__) … … 763 596 764 597 765 void DB_Close_Native(void *db)598 void _DB_Close(void *db) 766 599 { 767 600 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 768 #ifndef USE_BTREE769 int i;770 FILE *fp = SW_DB->fp;771 #endif772 773 601 774 602 /* Close (and rename) property file, if it's open */ 775 DB_Close_File_Native(&SW_DB->fp_prop, &SW_DB->cur_prop_file, &SW_DB->tmp_prop); 776 777 #ifndef USE_BTREE 778 779 if (SW_DB->mode == DB_CREATE || SW_DB->mode == DB_READWRITE) /* If we are indexing update offsets to words and files */ 780 { 781 /* Update internal pointers */ 782 783 sw_fseek(fp, SW_DB->offsetstart, SEEK_SET); 784 for (i = 0; i < MAXCHARS; i++) 785 printfileoffset(fp, SW_DB->offsets[i], sw_fwrite); 786 787 sw_fseek(fp, SW_DB->hashstart, SEEK_SET); 788 for (i = 0; i < VERYBIGHASHSIZE; i++) 789 printfileoffset(fp, SW_DB->hashoffsets[i], sw_fwrite); 790 } 791 792 /* Close (and rename) the index file */ 793 DB_Close_File_Native(&SW_DB->fp, &SW_DB->cur_index_file, &SW_DB->tmp_index); 794 795 #else 603 _DB_Close_File(&SW_DB->fp_prop, &SW_DB->cur_prop_file, &SW_DB->tmp_prop); 796 604 797 605 /* Close (and rename) worddata file, if it's open */ … … 805 613 { 806 614 /* Close (and rename) property file, if it's open */ 807 DB_Close_File_Native(&SW_DB->fp_propindex, &SW_DB->cur_propindex_file, &SW_DB->tmp_propindex);615 _DB_Close_File(&SW_DB->fp_propindex, &SW_DB->cur_propindex_file, &SW_DB->tmp_propindex); 808 616 } 809 617 /* Close (and rename) totwords file, if it's open */ … … 811 619 { 812 620 /* Close (and rename) totwords file, if it's open */ 813 DB_Close_File_Native(&SW_DB->fp_totwords, &SW_DB->cur_totwords_file, &SW_DB->tmp_totwords);621 _DB_Close_File(&SW_DB->fp_totwords, &SW_DB->cur_totwords_file, &SW_DB->tmp_totwords); 814 622 } 815 623 /* Close (and rename) presorted index file, if it's open */ 816 624 if(SW_DB->fp_presorted) 817 625 { 818 DB_Close_File_Native(&SW_DB->fp_presorted, &SW_DB->cur_presorted_file, &SW_DB->tmp_presorted);626 _DB_Close_File(&SW_DB->fp_presorted, &SW_DB->cur_presorted_file, &SW_DB->tmp_presorted); 819 627 } 820 628 /* Close (and rename) header index file, if it's open */ 821 629 if(SW_DB->fp_header) 822 630 { 823 DB_Close_File_Native(&SW_DB->fp_header, &SW_DB->cur_header_file, &SW_DB->tmp_header);631 _DB_Close_File(&SW_DB->fp_header, &SW_DB->cur_header_file, &SW_DB->tmp_header); 824 632 } 825 633 826 634 /* Close (and rename) hash-file index file, if it's open */ 827 635 CloseBerkeleyFile(&SW_DB->db_hashfile, &SW_DB->cur_hashfile_file, &SW_DB->tmp_hashfile); 828 #endif829 830 636 831 637 if (SW_DB->dbname) … … 834 640 } 835 641 836 void DB_Remove_Native(void *db)642 void _DB_Remove(void *db) 837 643 { 838 644 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; … … 856 662 /*--------------------------------------------*/ 857 663 858 int DB_InitWriteHeader_Native(void *db) 859 { 860 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 861 #ifndef USE_BTREE 862 /* The index file is being created. So put the header in the 863 ** current file position (coincides with the end of the file 864 */ 865 SW_DB->offsets[HEADERPOS] = sw_ftell(SW_DB->fp); 866 #else 867 long swish_magic; 868 /* Jump over swish_magic ID (long number) */ 869 sw_fseek(SW_DB->fp_header, (sw_off_t)0, SEEK_SET); 870 swish_magic = readlong(SW_DB->fp_header, sw_fread); 871 #endif 872 873 return 0; 874 } 875 876 877 int DB_EndWriteHeader_Native(void *db) 878 { 879 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 880 #ifndef USE_BTREE 881 FILE *fp = SW_DB->fp; 882 #else 883 FILE *fp = SW_DB->fp_header; 884 #endif 664 int _DB_InitWriteHeader(void *db) 665 { 666 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 667 long swish_magic; 668 669 /* Jump over swish_magic ID (long number) */ 670 sw_fseek(SW_DB->fp_header, (sw_off_t)0, SEEK_SET); 671 swish_magic = readlong(SW_DB->fp_header, sw_fread); 672 673 return 0; 674 } 675 676 677 int _DB_EndWriteHeader(void *db) 678 { 679 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 680 FILE *fp = SW_DB->fp_header; 885 681 886 682 /* End of header delimiter */ … … 891 687 } 892 688 893 int DB_WriteHeaderData_Native(int id, unsigned char *s, int len, void *db) 894 { 895 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 896 #ifndef USE_BTREE 897 FILE *fp = SW_DB->fp; 898 #else 899 FILE *fp = SW_DB->fp_header; 900 #endif 689 int _DB_WriteHeaderData(int id, unsigned char *s, int len, void *db) 690 { 691 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 692 FILE *fp = SW_DB->fp_header; 901 693 902 694 compress1(id, fp, sw_fputc); … … 909 701 910 702 911 int DB_InitReadHeader_Native(void *db) 912 { 913 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 914 #ifndef USE_BTREE 915 sw_fseek(SW_DB->fp, SW_DB->offsets[HEADERPOS], SEEK_SET); 916 #else 917 long swish_magic; 918 /* Jump over swish_magic ID (long number) */ 919 sw_fseek(SW_DB->fp_header, (sw_off_t)0, SEEK_SET); 920 swish_magic = readlong(SW_DB->fp_header, sw_fread); 921 #endif 922 return 0; 923 } 924 925 int DB_ReadHeaderData_Native(int *id, unsigned char **s, int *len, void *db) 926 { 927 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 928 #ifndef USE_BTREE 929 FILE *fp = SW_DB->fp; 930 #else 931 FILE *fp = SW_DB->fp_header; 932 #endif 933 int tmp; 703 int _DB_InitReadHeader(void *db) 704 { 705 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 706 long swish_magic; 707 708 /* Jump over swish_magic ID (long number) */ 709 sw_fseek(SW_DB->fp_header, (sw_off_t)0, SEEK_SET); 710 swish_magic = readlong(SW_DB->fp_header, sw_fread); 711 712 return 0; 713 } 714 715 int _DB_ReadHeaderData(int *id, unsigned char **s, int *len, void *db) 716 { 717 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 718 FILE *fp = SW_DB->fp_header; 719 int tmp; 934 720 935 721 tmp = uncompress1(fp, sw_fgetc); … … 951 737 } 952 738 953 int DB_EndReadHeader_Native(void *db)739 int _DB_EndReadHeader(void *db) 954 740 { 955 741 return 0; … … 962 748 /*--------------------------------------------*/ 963 749 964 int DB_InitWriteWords_Native(void *db) 965 { 966 967 #ifndef USE_BTREE 968 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 969 SW_DB->offsets[WORDPOS] = sw_ftell(SW_DB->fp); 970 #endif 971 750 int _DB_InitWriteWords(void *db) 751 { 972 752 return 0; 973 753 } … … 984 764 } 985 765 986 int DB_EndWriteWords_Native(void *db) 987 { 988 #ifndef USE_BTREE 766 int _DB_EndWriteWords(void *db) 767 { 768 return 0; 769 } 770 771 sw_off_t _DB_GetWordID(void *db) 772 { 989 773 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 990 FILE *fp = (FILE *) SW_DB->fp;991 int i,992 wordlen;993 sw_off_t wordID, word_pos;994 sw_off_t f_hash_offset, f_offset;995 996 /* Free hash zone */997 Mem_ZoneFree(&SW_DB->hashzone);998 999 /* Now update word's data offset into the list of words */1000 /* Simple check words and worddata must match */1001 1002 if (! SW_DB->num_words)1003 progerr("No unique words indexed");1004 1005 if (SW_DB->num_words != SW_DB->wordhash_counter)1006 progerrno("Internal DB_native error - SW_DB->num_words != SW_DB->wordhash_counter: ");1007 1008 if (SW_DB->num_words != SW_DB->worddata_counter)1009 progerrno("Internal DB_native error - SW_DB->num_words != SW_DB->worddata_counter: ");1010 1011 /* Sort wordhashdata to be written to allow sequential writes */1012 swish_qsort(SW_DB->wordhashdata, SW_DB->num_words, 3 * sizeof(sw_off_t), cmp_wordhashdata);1013 1014 if (WRITE_WORDS_RAMDISK)1015 {1016 fp = (FILE *) SW_DB->rd;1017 }1018 for (i = 0; i < SW_DB->num_words; i++)1019 {1020 wordID = SW_DB->wordhashdata[3 * i];1021 f_hash_offset = SW_DB->wordhashdata[3 * i + 1];1022 f_offset = SW_DB->wordhashdata[3 * i + 2];1023 1024 word_pos = wordID;1025 if (WRITE_WORDS_RAMDISK)1026 {1027 word_pos -= SW_DB->offsets[WORDPOS];1028 }1029 /* Position file pointer in word */1030 SW_DB->w_seek(fp, word_pos, SEEK_SET);1031 /* Jump over word length and word */1032 wordlen = uncompress1(fp, SW_DB->w_getc); /* Get Word length */1033 SW_DB->w_seek(fp, (sw_off_t) wordlen, SEEK_CUR); /* Jump Word */1034 /* Write offset to next chain */1035 printfileoffset(fp, f_hash_offset, SW_DB->w_write);1036 /* Write offset to word data */1037 printfileoffset(fp, f_offset, SW_DB->w_write);1038 }1039 1040 efree(SW_DB->wordhashdata);1041 SW_DB->wordhashdata = NULL;1042 SW_DB->worddata_counter = 0;1043 SW_DB->wordhash_counter = 0;1044 1045 if (WRITE_WORDS_RAMDISK)1046 {1047 unsigned char buffer[4096];1048 sw_off_t ramdisk_size;1049 long read = 0;1050 1051 ramdisk_seek((FILE *) SW_DB->rd, (sw_off_t)0, SEEK_END);1052 ramdisk_size = ramdisk_tell((FILE *) SW_DB->rd);1053 /* Write ramdisk to fp end free it */1054 sw_fseek((FILE *) SW_DB->fp, SW_DB->offsets[WORDPOS], SEEK_SET);1055 ramdisk_seek((FILE *) SW_DB->rd, (sw_off_t)0, SEEK_SET);1056 while (ramdisk_size)1057 {1058 read = ramdisk_read(buffer, 4096, 1, (FILE *) SW_DB->rd);1059 if ( sw_fwrite(buffer, read, 1, SW_DB->fp) != 1 )1060 progerrno("Error while flushing ramdisk to disk:");1061 1062 ramdisk_size -= (sw_off_t)read;1063 }1064 ramdisk_close((FILE *) SW_DB->rd);1065 }1066 /* Get last word file offset - For the last word, this will be1067 ** used to delimite the last word in the index file1068 ** In other words. This is the file offset where no more words1069 ** are added.1070 */1071 SW_DB->offsets[ENDWORDPOS] = sw_ftell(SW_DB->fp);1072 1073 /* Restore file pointer at the end of file */1074 sw_fseek(SW_DB->fp, (sw_off_t)0, SEEK_END);1075 if ( sw_fputc(0, SW_DB->fp) == EOF ) /* End of words mark */1076 progerrno("sw_fputc() failed writing null: ");1077 1078 #endif1079 1080 return 0;1081 }1082 1083 #ifndef USE_BTREE1084 sw_off_t DB_GetWordID_Native(void *db)1085 {1086 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db;1087 FILE *fp = SW_DB->fp;1088 sw_off_t pos = (sw_off_t)0;1089 1090 if (WRITE_WORDS_RAMDISK)1091 {1092 if (!SW_DB->rd)1093 {1094 /* ramdisk size as suggested by Bill Meier */1095 SW_DB->rd = ramdisk_create("RAM Disk: write words", 32 * 4096);1096 }1097 pos = SW_DB->offsets[WORDPOS];1098 fp = (FILE *) SW_DB->rd;1099 }1100 pos += SW_DB->w_tell(fp);1101 1102 return pos; /* Native database uses position as a Word ID */1103 }1104 1105 int DB_WriteWord_Native(char *word, sw_off_t wordID, void *db)1106 {1107 int i,1108 wordlen;1109 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db;1110 1111 FILE *fp = SW_DB->fp;1112 1113 i = (int) ((unsigned char) word[0]);1114 1115 if (!SW_DB->offsets[i])1116 SW_DB->offsets[i] = wordID;1117 1118 1119 /* Write word length, word and a NULL offset */1120 wordlen = strlen(word);1121 1122 if (WRITE_WORDS_RAMDISK)1123 {1124 fp = (FILE *) SW_DB->rd;1125 }1126 compress1(wordlen, fp, SW_DB->w_putc);1127 SW_DB->w_write(word, wordlen, sizeof(char), fp);1128 1129 printfileoffset(fp, (sw_off_t) 0, SW_DB->w_write); /* hash chain */1130 printfileoffset(fp, (sw_off_t) 0, SW_DB->w_write); /* word's data pointer */1131 1132 SW_DB->num_words++;1133 1134 return 0;1135 }1136 1137 int offsethash(sw_off_t offset)1138 {1139 return (int)(offset % (sw_off_t) BIGHASHSIZE);1140 }1141 1142 long DB_WriteWordData_Native(sw_off_t wordID, unsigned char *worddata, int data_size, int saved_bytes, void *db)1143 {1144 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db;1145 FILE *fp = SW_DB->fp;1146 struct numhash *numhash;1147 int numhashval;1148 1149 /* We must be at the end of the file */1150 1151 if (!SW_DB->worddata_counter)1152 {1153 /* We are starting writing worddata */1154 /* If inside a ramdisk we must preserve its space */1155 if (WRITE_WORDS_RAMDISK)1156 {1157 sw_off_t ramdisk_size;1158 1159 ramdisk_seek((FILE *) SW_DB->rd, (sw_off_t)0, SEEK_END);1160 ramdisk_size = ramdisk_tell((FILE *) SW_DB->rd);1161 /* Preserve ramdisk size in DB file */1162 /* it will be written later */1163 sw_fseek((FILE *) SW_DB->fp, ramdisk_size, SEEK_END);1164 }1165 }1166 /* Search for word's ID */1167 numhashval = offsethash(wordID);1168 for (numhash = SW_DB->hash[numhashval]; numhash; numhash = numhash->next)1169 if (SW_DB->wordhashdata[3 * numhash->index] == wordID)1170 break;1171 if (!numhash)1172 progerrno("Internal db_native.c error in DB_WriteWordData_Native: ");1173 SW_DB->wordhashdata[3 * numhash->index + 2] = sw_ftell(fp);1174 1175 SW_DB->worddata_counter++;1176 1177 /* Write the worddata to disk */1178 /* Write in the form: <data_size><saved_bytes><worddata> */1179 /* If there is not any compression then saved_bytes is 0 */1180 compress1(data_size, fp, sw_fputc);1181 compress1(saved_bytes, fp, sw_fputc);1182 if ( sw_fwrite(worddata, data_size, 1, fp) != 1 )1183 progerrno("Error writing to device while trying to write %d bytes: ", data_size );1184 1185 1186 /* A NULL byte to indicate end of word data */1187 if ( sw_fputc(0, fp) == EOF )1188 progerrno( "sw_fputc() returned error writing null: ");1189 1190 1191 1192 return 0;1193 }1194 1195 #else1196 1197 sw_off_t DB_GetWordID_Native(void *db)1198 {1199 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db;1200 774 1201 775 return (sw_off_t) SW_DB->worddata_counter; 1202 776 } 1203 777 1204 int DB_WriteWord_Native(char *word, sw_off_t wordID, void *db)778 int _DB_WriteWord(char *word, sw_off_t wordID, void *db) 1205 779 { 1206 780 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; … … 1218 792 data.size = sizeof(wordID); 1219 793 1220 ret = SW_DB->db_btree->put(SW_DB->db_btree,NULL,&key,&data, DB_NOOVERWRITE);794 ret = SW_DB->db_btree->put(SW_DB->db_btree,NULL,&key,&data,0); 1221 795 1222 796 if(ret != 0) … … 1229 803 } 1230 804 1231 int DB_UpdateWordID_Native(char *word, sw_off_t new_wordID, void *db) 1232 { 1233 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 1234 1235 DBT key,data; 1236 1237 /*Berkeley DB stuff */ 1238 memset(&key,0,sizeof(DBT)); 1239 memset(&data,0,sizeof(DBT)); 1240 key.data = word; 1241 key.size = strlen(word); 1242 data.data = (void *)&new_wordID; 1243 data.size = sizeof(new_wordID); 1244 SW_DB->db_btree->del(SW_DB->db_btree,NULL,&key,0); 1245 SW_DB->db_btree->put(SW_DB->db_btree,NULL,&key,&data,DB_NOOVERWRITE); 1246 1247 return 0; 1248 } 1249 1250 int DB_DeleteWordData_Native(sw_off_t wordID, void *db) 1251 { 1252 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 1253 1254 DBT key; 1255 1256 /*Berkeley DB stuff */ 1257 memset(&key,0,sizeof(DBT)); 1258 key.data = (void *)&wordID; 1259 key.size = sizeof(wordID); 1260 SW_DB->db_worddata->del(SW_DB->db_worddata,NULL,&key,0); 1261 1262 return 0; 1263 } 1264 1265 long DB_WriteWordData_Native(sw_off_t wordID, unsigned char *worddata, int data_size, int saved_bytes, void *db) 805 long _DB_WriteWordData(sw_off_t wordID, unsigned char *worddata, int data_size, int saved_bytes, void *db) 1266 806 { 1267 807 unsigned char stack_buffer[8192]; /* just to avoid emalloc,efree overhead */ … … 1270 810 DBT key,data; 1271 811 db_recno_t recno; 812 int ret; 1272 813 1273 814 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; … … 1285 826 /* Put bytes worddata buf */ 1286 827 memcpy(p,worddata,data_size); 1287 1288 SW_DB->worddata_counter++;1289 828 1290 829 /* Write the worddata to disk */ … … 1296 835 data.data = buf; 1297 836 data.size = buf_size; 1298 SW_DB->db_worddata->put(SW_DB->db_worddata,NULL,&key,&data,DB_APPEND); 1299 recno = *(db_recno_t *) key.data; 837 ret = SW_DB->db_worddata->put(SW_DB->db_worddata,NULL,&key,&data,DB_APPEND); 838 839 if(ret == 0) 840 { 841 recno = *(db_recno_t *) key.data; 842 SW_DB->worddata_counter = recno; 843 } 844 else 845 { 846 printf("ERROR %d inserting worddata in Berkeley DB BTREE\n",ret); 847 } 1300 848 1301 849 if(buf != stack_buffer) … … 1304 852 } 1305 853 1306 #endif 1307 1308 #ifndef USE_BTREE 1309 int DB_WriteWordHash_Native(char *word, sw_off_t wordID, void *db) 1310 { 1311 int i, 1312 hashval, 1313 numhashval; 1314 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 1315 struct numhash *numhash; 1316 1317 if (!SW_DB->wordhash_counter) 1318 { 1319 /* Init hash array */ 1320 for (i = 0; i < BIGHASHSIZE; i++) 1321 SW_DB->hash[i] = NULL; 1322 SW_DB->hashzone = Mem_ZoneCreate("WriteWordHash", SW_DB->num_words * sizeof(struct numhash), 0); 1323 1324 /* If we are here we have finished WriteWord_Native */ 1325 /* If using ramdisk - Reserve space upto the size of the ramdisk */ 1326 if (WRITE_WORDS_RAMDISK) 1327 { 1328 sw_off_t ram_size = (sw_off_t) (SW_DB->w_seek((FILE *) SW_DB->rd, 0, SEEK_END)); 1329 1330 sw_fseek(SW_DB->fp, ram_size, SEEK_SET); 1331 } 1332 1333 SW_DB->wordhashdata = emalloc(3 * SW_DB->num_words * sizeof(sw_off_t)); 1334 } 1335 1336 hashval = verybighash(word); 1337 1338 if (!SW_DB->hashoffsets[hashval]) 1339 { 1340 SW_DB->hashoffsets[hashval] = wordID; 1341 } 1342 1343 SW_DB->wordhashdata[3 * SW_DB->wordhash_counter] = wordID; 1344 SW_DB->wordhashdata[3 * SW_DB->wordhash_counter + 1] = (sw_off_t) 0; 1345 1346 1347 /* Add to the hash */ 1348 numhash = (struct numhash *) Mem_ZoneAlloc(SW_DB->hashzone, sizeof(struct numhash)); 1349 1350 numhashval = offsethash(wordID); 1351 numhash->index = SW_DB->wordhash_counter; 1352 numhash->next = SW_DB->hash[numhashval]; 1353 SW_DB->hash[numhashval] = numhash; 1354 1355 SW_DB->wordhash_counter++; 1356 1357 /* Update previous word in hashlist */ 1358 if (SW_DB->lasthashval[hashval]) 1359 { 1360 /* Search for SW_DB->lasthashval[hashval] */ 1361 numhashval = offsethash(SW_DB->lasthashval[hashval]); 1362 for (numhash = SW_DB->hash[numhashval]; numhash; numhash = numhash->next) 1363 if (SW_DB->wordhashdata[3 * numhash->index] == SW_DB->lasthashval[hashval]) 1364 break; 1365 if (!numhash) 1366 progerrno("Internal db_native.c error in DB_WriteWordHash_Native: "); 1367 SW_DB->wordhashdata[3 * numhash->index + 1] = wordID; 1368 } 1369 SW_DB->lasthashval[hashval] = wordID; 1370 1371 return 0; 1372 } 1373 #endif 1374 1375 int DB_InitReadWords_Native(void *db) 1376 { 1377 return 0; 1378 } 1379 1380 int DB_EndReadWords_Native(void *db) 1381 { 1382 return 0; 1383 } 1384 1385 #ifndef USE_BTREE 1386 int DB_ReadWordHash_Native(char *word, sw_off_t *wordID, void *db) 1387 { 1388 int wordlen, 1389 res, 1390 hashval; 1391 sw_off_t offset, dataoffset; 1392 char *fileword = NULL; 1393 struct Handle_DBNative *SW_DB = (struct Handle_DBNative *) db; 1394 FILE *fp = SW_DB->fp; 1395 1396 1397 /* If there is not a star use the hash approach ... */ 1398 res = 1; 1399 1400 /* Get hash file offset */ 1401 hashval = verybighash(word); 1402 if (!(offset = SW_DB->hashoffsets[hashval])) 1403 { 1404 *wordID = (sw_off_t)0; 1405 return 0; 1406 } 1407 /* Search for word */ 1408 while (res) 1409 { 1410 /* Position in file */ 1411 sw_fseek(fp, offset, SEEK_SET); 1412 /* Get word */ 1413 &nb
