Changeset 7
- Timestamp:
- 12/13/00 02:14:33 (8 years ago)
- Files:
-
- trunk/swish-e/INSTALL (added)
- trunk/swish-e/Makefile.in (modified) (6 diffs)
- trunk/swish-e/README (added)
- trunk/swish-e/README-PERL (deleted)
- trunk/swish-e/SWISH-PERL (added)
- trunk/swish-e/perl/SWISHE.xs (modified) (1 diff)
- trunk/swish-e/perl/test.pl (modified) (1 diff)
- trunk/swish-e/src/Makefile.in (modified) (5 diffs)
- trunk/swish-e/src/check.c (modified) (1 diff)
- trunk/swish-e/src/check.h (modified) (1 diff)
- trunk/swish-e/src/compress.c (modified) (5 diffs)
- trunk/swish-e/src/file.c (modified) (6 diffs)
- trunk/swish-e/src/fs.c (modified) (11 diffs)
- trunk/swish-e/src/fs.h (modified) (1 diff)
- trunk/swish-e/src/html.c (added)
- trunk/swish-e/src/html.h (added)
- trunk/swish-e/src/http.c (modified) (2 diffs)
- trunk/swish-e/src/index.c (modified) (27 diffs)
- trunk/swish-e/src/index.h (modified) (3 diffs)
- trunk/swish-e/src/mem.c (modified) (1 diff)
- trunk/swish-e/src/mem.h (modified) (1 diff)
- trunk/swish-e/src/merge.c (modified) (4 diffs)
- trunk/swish-e/src/merge.h (modified) (1 diff)
- trunk/swish-e/src/search.c (modified) (11 diffs)
- trunk/swish-e/src/string.c (modified) (3 diffs)
- trunk/swish-e/src/string.h (modified) (2 diffs)
- trunk/swish-e/src/swish.h (modified) (10 diffs)
- trunk/swish-e/src/swish2.c (modified) (1 diff)
- trunk/swish-e/src/txt.c (modified) (3 diffs)
- trunk/swish-e/src/txt.h (modified) (1 diff)
- trunk/swish-e/src/xml.c (modified) (3 diffs)
- trunk/swish-e/src/xml.h (modified) (1 diff)
- trunk/swish-e/tests/test.config (modified) (2 diffs)
- trunk/swish-e/tests/test.html (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/swish-e/Makefile.in
r5 r7 2 2 # 3 3 # Bas Meijer, bas@antraciet.com 11 Dec 2000 4 5 # $Id$ 4 6 5 7 SHELL = /bin/sh … … 7 9 VPATH = $(srcdir) 8 10 prefix = @prefix@ 11 exec_prefix = @exec_prefix@ 9 12 bindir = $(prefix)/bin 10 13 mandir = $(prefix)/man/man1 … … 16 19 swish-e: 17 20 echo 'making swish-e' 18 (cd src;./configure --quiet --prefix=$(prefix) ;make;cd ..)21 (cd src;./configure --quiet --prefix=$(prefix) --libdir=$(libdir) ;make;cd ..) 19 22 echo 'swish-e done' 20 23 swish-search: swish-e … … 25 28 # 26 29 clean: 27 rm -f src/swish-e src/swish-search src/ *.o \30 rm -f src/swish-e src/swish-search src/libswish-e.a src/*.o \ 28 31 src/index.swish src/Makefile Makefile 29 32 … … 32 35 rm -f src/config.cache src/config.log \ 33 36 src/config.status 34 rm -f src/swish-e src/swish-search src/*.o \ 35 src/index.swish src/Makefile Makefile 37 rm -f src/swish-e src/swish-search src/libswish-e.a src/*.o \ 38 src/index.swish src/Makefile Makefile \ 39 src/testlib 36 40 37 41 test: … … 49 53 ../src/swish-e -f ./test.index -w 'meta3="three little pigs"' | grep -v '^#'; ) 50 54 55 install: 56 cp src/swish-e $(bindir)/swish-e 57 58 install-lib: 59 cp src/libswish-e.a $(libdir)/libswish-e.a 60 trunk/swish-e/perl/SWISHE.xs
r2 r7 56 56 XPUSHs(sv_2mortal(newSVpv(result->filename,0))); 57 57 XPUSHs(sv_2mortal(newSVpv(result->title,0))); 58 XPUSHs(sv_2mortal(newSVpv(result->summary,0))); 58 59 XPUSHs(sv_2mortal(newSViv(result->start))); 59 60 XPUSHs(sv_2mortal(newSViv(result->size))); trunk/swish-e/perl/test.pl
r2 r7 1 #!/usr/local/bin/perl 1 #!/usr/local/bin/perl -w 2 2 3 use SWISHE;3 use strict; 4 4 5 #$properties='prop1 prop2 prop3'; 6 #$sortspec='prop1 asc prop2 desc'; 7 $searchstring='meta1=metatest1'; 5 =pod 6 Test script for the SWISHE library 7 please see perldoc README-PERL for more information 8 $Id$ 9 =cut 8 10 9 $indexfilename1='../tests/test.index'; 10 $indexfilename2='../tests/test.index'; 11 #$indexfilename2='another.index'; 12 $indexfilename="$indexfilename1"; 13 # To search for several index just put them together 14 $indexfilename="$indexfilename1 $indexfilename2"; 15 16 #Stemming sample 17 $word="parking"; 18 $stem_word=SwishStem($word); 19 print "$word is stemmed as $stem_word\n"; 11 # Import symbols 12 use SWISHE; 20 13 21 14 22 unless($handle=SWISHE::SwishOpen($indexfilename)) 23 { 24 print "Could not open index files\n"; 25 die; 15 # In this test we will use the same index twice 16 17 my $indexfilename1 = '../tests/test.index'; 18 my $indexfilename2 = $indexfilename1; 19 20 die "Index file '$indexfilename1' not found! Did you run make test?\n" 21 unless -e $indexfilename1; 22 23 my $indexfiles = $indexfilename1; 24 25 26 # To search for several indexes just put them together 27 #my $indexfiles = "$indexfilename1 $indexfilename2"; 28 29 30 # Open the index files 31 32 my $handle = SwishOpen( $indexfiles ) 33 or die "Failed to open '$indexfiles'"; 34 35 36 # Get a few headers from the index files 37 38 my @headers = qw/WordCharacters BeginCharacters EndCharacters/; 39 push @headers, 'Indexed on'; 40 41 for ( @headers ) { 42 print_header("Header '$_'"); 43 44 my @h = SwishHeaderParameter( $handle, $_ ); 45 print "$_ for index 0 is $h[0]\n"; 46 } 47 48 49 # Define a few searches 50 51 my @searches = ( 52 { 53 title => 'Normal search', 54 query => 'test', 55 props => '', 56 sort => '', 57 context => 1, # Search the entire file 58 }, 59 { 60 title => 'MetaTag search 1', 61 query => 'meta1=metatest1', 62 props => 'meta1 meta2 meta3', 63 sort => '', 64 context => 1, # Search the entire file 65 }, 66 { 67 title => 'MetaTag search 2', 68 query => 'meta2=metatest2', 69 props => 'meta1 meta2 meta3', 70 sort => '', 71 context => 1, # Search the entire file 72 }, 73 { 74 title => 'XML Search', 75 query => 'meta3=metatest3', 76 props => 'meta1 meta2 meta3', 77 sort => '', 78 context => 1, # Search the entire file 79 }, 80 { 81 title => 'Phrase Search', 82 query => 'meta3="three little pigs"', 83 props => 'meta1 meta2 meta3', 84 sort => '', 85 context => 1, # Search the entire file 86 }, 87 { 88 title => 'Advanced search', 89 query => 'test or meta1=m* or meta2=m* or meta3=m*', 90 props => 'meta1 meta2 meta3', 91 sort => '', 92 context => 1, # Search the entire file 93 }, 94 ); 95 96 my @settings = qw/query context props sort/; 97 98 print_header("*** Now searching ****"); 99 print "Note that some META names have embedded newlines.\n"; 100 101 102 for my $search ( @searches ) { 103 print_header( "$search->{title} - Query: '$search->{query}'" ); 104 105 my $num_results = SwishSearch( $handle, @{$search}{ @settings } ); 106 107 print "# Number of results = $num_results\n\n"; 108 109 unless ( $num_results ) { 110 print "No Results\n"; 111 112 my $error = SwishError( $handle ); 113 print "Error number: $error\n" if $error; 114 115 next; 116 } 117 118 while( my($rank,$index,$file,$title,$summary,$start,$size,@props) = SwishNext( $handle )) { 119 print join( ' ', 120 $rank, 121 $index, 122 $file, 123 qq["$title"], 124 qq["$summary"], 125 $start, 126 $size, 127 map{ qq["$_"] } @props, 128 ),"\n"; 129 } 130 } 131 132 print_header('Other Functions'); 133 134 135 136 my @stemwords = qw/parking libaries library librarians money monies running runs is/; 137 print "\nStemming:\n"; 138 print " '$_' => '" . SwishStem( $_ ) . "'\n" for @stemwords; 139 print "\n"; 140 141 my @stopwords = SwishStopWords( $handle, $indexfilename1 ); 142 print 'Stopwords: ', 143 ( @stopwords ? join(', ', @stopwords) : '** None **' ), 144 "\n\n"; 145 146 147 my $letter = 't'; 148 my @keywords = SwishWords( $handle, $indexfilename1, $letter); 149 150 print "List of keywords that start with the letter '$letter':\n", 151 join("\n", @keywords), 152 "\n\n"; 153 154 155 156 SwishClose( $handle ); 157 158 sub print_header { 159 print "\n", '-' x length( $_[0] ),"\n", 160 $_[0], 161 "\n", '-' x length( $_[0] ),"\n"; 26 162 } 27 163 28 # Need some info from header ? Here is how29 # Since we have open two files, two values are returned30 @wordchars=SWISHE::SwishHeaderParameter($handle,"WordCharacters");31 print "WordCharacters 0 = @wordchars[0]\n";32 print "WordCharacters 1 = @wordchars[1]\n";33 164 34 @beginchars=SWISHE::SwishHeaderParameter($handle,"BeginCharacters");35 print "BeginCharacters 0 = @beginchars[0]\n";36 print "BeginCharacters 1 = @beginchars[1]\n";37 38 @endchars=SWISHE::SwishHeaderParameter($handle,"EndCharacters");39 print "EndCharacters 0 = @endchars[0]\n";40 print "EndCharacters 1 = @endchars[1]\n";41 42 @ignorefirstchar=SWISHE::SwishHeaderParameter($handle,"IgnoreFirstChar");43 print "IgnoreFirstChar 0 = @ignorefirstchar[0]\n";44 print "IgnoreFirstChar 1 = @ignorefirstchar[1]\n";45 46 @ignorelastchar=SWISHE::SwishHeaderParameter($handle,"IgnoreLastChar");47 print "IgnoreLastChar 0 = @ignorelastchar[0]\n";48 print "IgnoreLastChar 1 = @ignorelastchar[1]\n";49 50 @indexedon=SWISHE::SwishHeaderParameter($handle,"Indexed on");51 print "Indexed on 0 = @indexedon[0]\n";52 print "Indexed on 1 = @indexedon[1]\n";53 54 @description=SWISHE::SwishHeaderParameter($handle,"Description");55 print "Description 0 = @description[0]\n";56 print "Description 1 = @description[1]\n";57 58 @indexpointer=SWISHE::SwishHeaderParameter($handle,"IndexPointer");59 print "IndexPointer 0 = @indexpointer[0]\n";60 print "IndexPointer 1 = @indexpointer[1]\n";61 62 @indexadmin=SWISHE::SwishHeaderParameter($handle,"IndexAdmin");63 print "IndexAdmin 0 = @indexadmin[0]\n";64 print "IndexAdmin 1 = @indexadmin[1]\n";65 66 @stemming=SWISHE::SwishHeaderParameter($handle,"Stemming");67 print "Stemming 0 = @stemming[0]\n";68 print "Stemming 1 = @stemming[1]\n";69 70 @soundex=SWISHE::SwishHeaderParameter($handle,"Soundex");71 print "Soundex 0 = @soundex[0]\n";72 print "Soundex 1 = @soundex[1]\n";73 74 # Do you want know the stopwords? Here is how75 @stopwords=SWISHE::SwishStopWords($handle,$indexfilename1);76 print "StopWords =";77 for($i=0;@stopwords[$i];$i++)78 {79 print " @stopwords[$i]"80 }81 print "\n";82 83 # Do you want know the indexeded words starting with 't'? Here is how84 @keywords=SWISHE::SwishWords($handle,$indexfilename1,"t");85 print "KeyWords =";86 for($i=0;@keywords[$i];$i++)87 {88 print " @keywords[$i]"89 }90 print "\n";91 92 $structure=1;93 94 # Uncomment for an endless loop95 #while (<>)96 #{97 $num_results=SwishSearch($handle,$searchstring,$structure,$properties,$sortspec);98 99 if ($num_results<0)100 {101 print "Search error: $num_results\n";102 } else{103 print "Search Results: $num_results\n";104 }105 106 while(($rank,$indexfile,$filename,$title,$start,$size,$prop1,$prop2,$prop3)=SWISHE::SwishNext($handle))107 {108 print "$rank $indexfile $filename \"$title\" $start $size \"$prop1\" \"$prop2\" \"$prop3\"\n";109 }110 # Uncomment for an endless loop111 #}112 113 SWISHE::SwishClose($handle);trunk/swish-e/src/Makefile.in
r6 r7 10 10 # Several ideas from a Makefile by Christian Lindig <lindig@ips.cs.tu-bs.de> 11 11 # 12 # $Id$ 13 12 14 NAME = swish-e 13 15 # C compiler … … 17 19 prefix = @prefix@ 18 20 bindir = $(prefix)/bin 19 libdir = $(prefix)/lib 21 exec_prefix = @exec_prefix@ 22 libdir = @libdir@ 20 23 mandir = $(prefix)/man 21 24 man1dir = $(mandir)/man1 … … 41 44 OBJS= check.o file.o index.o search.o error.o methods.o\ 42 45 hash.o list.o mem.o string.o merge.o swish2.o stemmer.o \ 43 soundex.o docprop.o compress.o deflate.o xml.o txt.o \46 soundex.o docprop.o compress.o deflate.o xml.o txt.o html.o\ 44 47 $(FILESYSTEM_OBJS) $(HTTP_OBJS) 45 48 … … 90 93 $(OBJS): Makefile config.h swish.h 91 94 92 install: 95 install: 96 cp swish-e $(bindir)/swish-e 97 98 install-lib: 93 99 cp libswish-e.a $(libdir)/libswish-e.a 94 100 … … 110 116 list.h 111 117 hash.o: hash.c swish.h config.h hash.h mem.h string.h 118 html.o: html.c swish.h mem.h html.h 112 119 http.o: http.c swish.h config.h index.h hash.h string.h mem.h file.h \ 113 http.h httpserver.h 120 http.h httpserver.h html.h 114 121 httpserver.o: httpserver.c swish.h config.h string.h mem.h http.h \ 115 122 httpserver.h trunk/swish-e/src/check.c
r2 r7 216 216 return NODOCTYPE; 217 217 } 218 219 220 struct StoreDescription *hasdescription(int doctype, struct StoreDescription *sd) 221 { 222 while(sd) 223 { 224 if(sd->DocType==doctype) 225 return sd; 226 sd=sd->next; 227 } 228 return NULL; 229 } trunk/swish-e/src/check.h
r2 r7 27 27 char *hasfilter _AP((char *filename, struct filter *filterlist)); 28 28 int getdoctype _AP((char *filename, struct IndexContents *indexcontents)); 29 struct StoreDescription *hasdescription _AP ((int, struct StoreDescription *)); trunk/swish-e/src/compress.c
r2 r7 211 211 } 212 212 213 buffer=buildFileEntry(filep->fi.filename, filep->fi.title, filep->fi.s tart, filep->fi.size, sw->fp_file_write, &filep->docProperties, filep->fi.lookup_path,&sz_buffer);213 buffer=buildFileEntry(filep->fi.filename, filep->fi.title, filep->fi.summary, filep->fi.start, filep->fi.size, sw->fp_file_write, &filep->docProperties, filep->fi.lookup_path,&sz_buffer); 214 214 tmp=sz_buffer+1; 215 215 compress1(tmp,sw->fp_file_write); /* Write len */ … … 228 228 { 229 229 struct file *fi; 230 int len,len1,len2, begin,bytes,lookup_path;230 int len,len1,len2,len3,begin,bytes,lookup_path; 231 231 char *buffer,*p; 232 char *buf1,*buf2 ;232 char *buf1,*buf2,*buf3; 233 233 fi=(struct file *)emalloc(sizeof(struct file)); 234 234 if (!sw->fp_file_read) … … 253 253 uncompress3(len2,p); /* Read length of title */ 254 254 if(!len2) /* filename == title */ 255 buf2= estrdup(buf1);255 buf2=buf1; 256 256 else { 257 257 buf2 = emalloc(len2); … … 259 259 p+=len2; 260 260 } 261 uncompress3(len3,p); /* Read length of summary */ 262 if(!len3) /* No summary */ 263 buf3=NULL; 264 else { 265 buf3 = emalloc(len3); 266 memcpy(buf3,p,len3); /* Read summary */ 267 p+=len3; 268 } 261 269 uncompress3(begin,p); /* Read start */ 262 270 begin--; … … 267 275 fi->fi.filename = buf1; 268 276 fi->fi.title = buf2; 277 fi->fi.summary = buf3; 269 278 fi->fi.start = begin; 270 279 fi->fi.size = bytes; trunk/swish-e/src/file.c
r2 r7 317 317 int DocType=0; 318 318 struct IndexContents *ic; 319 struct StoreDescription *sd; 319 320 IndexFILE *indexf=NULL; 320 321 … … 527 528 sw->indexcontents=ic; 528 529 } else progerr("IndexContents requires at least two values"); 530 } 531 else if ((c = (char *)lstrstr(line,"StoreDescription")) !=0) { 532 c += strlen("StoreDescription"); 533 sl=parse_line(c); 534 if(sl && (sl->n==2 || sl->n==3)) { 535 if(strcasecmp(sl->word[0],"TXT")==0) { 536 DocType=TXT; 537 } 538 else if(strcasecmp(sl->word[0],"HTML")==0) { 539 DocType=HTML; 540 } 541 else if(strcasecmp(sl->word[0],"XML")==0) { 542 DocType=XML; 543 } 544 else if(strcasecmp(sl->word[0],"MULTITXT")==0) { 545 DocType=MULTITXT; 546 } 547 else if(strcasecmp(sl->word[0],"WML")==0) { 548 DocType=WML; 549 } else progerr("Unknown document type in StoreDescription"); 550 sd=(struct StoreDescription *)emalloc(sizeof(struct StoreDescription)); 551 sd->DocType=DocType; 552 sd->size=0; 553 sd->field=NULL; 554 i=1; 555 556 if(sl->word[i][0]=='<' && sl->word[i][strlen(sl->word[i])-1]=='>') 557 { 558 sl->word[i][strlen(sl->word[i])-1]='\0'; 559 sd->field=estrdup(sl->word[i]+1); 560 i++; 561 } 562 if(i<sl->n && isnumstring(sl->word[i])) 563 { 564 sd->size=atoi(sl->word[i]); 565 } 566 if(sl->n==2 && !sd->field && !sd->size) 567 progerr("Second parameter of StoreDescription must be <fieldname> or a number"); 568 if(sl->n==3 && sd->field && !sd->size) 569 progerr("Third parameter of StoreDescription must be empty or a number"); 570 if(sw->storedescription) 571 sd->next=sw->storedescription; 572 else 573 sd->next=NULL; 574 sw->storedescription=sd; 575 } else progerr("StoreDescription requires two or three values"); 529 576 } 530 577 else if ((c = (char *)lstrstr(line,"DefaultContents")) !=0) { … … 858 905 -- return: (FileProp *) 859 906 -- A failed stat returns an empty (default) structure 907 908 -- 2000-12 909 -- Added StoreDescription 860 910 */ 861 911 … … 878 928 fprop->index_no_content = 0; /* former: was indextitleonly! */ 879 929 fprop->filterprog = NULL; /* Default = No Filter */ 930 fprop->stordesc = NULL; /* Default = No summary */ 880 931 881 932 fprop->real_path = real_path; 882 933 fprop->work_path = (work_file) ? work_file : real_path; 883 /* -- also possible: a save copy of pathes using estrdup(),884 -- but IMO not necessarry [to discuss...] $$885 */886 887 934 888 935 /* -- Get Properties of File … … 893 940 fprop->mtime = stbuf.st_mtime; 894 941 895 896 897 942 /* -- get Doc Type as is in IndexContents or Defaultcontents 898 943 -- doctypes by jruiz … … 917 962 fprop->filterprog = hasfilter (fprop->real_path,sw->filterlist); 918 963 964 fprop->stordesc = hasdescription (fprop->doctype,sw->storedescription); 919 965 920 966 trunk/swish-e/src/fs.c
r2 r7 103 103 int lens; 104 104 char *s,*p; 105 int lentitle;106 char *title,*tmptitle;107 105 DOCENTRYARRAY *sortfilelist, *sortdirlist; 108 106 struct swline *tmplist; … … 139 137 140 138 s=(char *)emalloc((lens=MAXFILELEN) + 1); 141 title=(char *)emalloc((lentitle=MAXTITLELEN) + 1);142 139 143 140 dfd = opendir(dir); … … 205 202 continue; 206 203 207 /* $$$--- should the following be better in "html.c or whatever" (countwords_html) routine?208 */209 if (ishtml(sw,s)) {210 title=SafeStrCopy(title, (char *) (tmptitle=parsetitle(s, s)),&lentitle);211 efree(tmptitle);212 if (!isoktitle(sw,title))213 continue;214 }215 else {216 if ((p=strrchr(s, '/')))217 { title=SafeStrCopy(title,p + 1,&lentitle); }218 else219 { title=SafeStrCopy(title, s,&lentitle); }220 }221 204 sortfilelist = (DOCENTRYARRAY *) 222 addsortentry(sortfilelist, s , title);205 addsortentry(sortfilelist, s); 223 206 } 224 207 else { 225 208 sortdirlist = (DOCENTRYARRAY *) 226 addsortentry(sortdirlist, s, s); 227 } 228 } 229 230 efree(title); 209 addsortentry(sortdirlist, s); 210 } 211 } 212 231 213 efree(s); 232 214 … … 244 226 int badfile; 245 227 char *t; 246 int lentitle; 247 char *title,*tmptitle; 248 DOCENTRY *fileentry; 228 char *filename; 249 229 struct swline *tmplist; 250 230 … … 298 278 299 279 300 /* $$$--- should the following be better in html.c (countwords_html) routine? 301 */ 302 303 title=(char *) emalloc((lentitle=MAXSTRLEN)+1); 304 if (ishtml(sw,path)) { 305 title = SafeStrCopy(title, (char *) (tmptitle=parsetitle(path, path)),&lentitle); 306 efree(tmptitle); 307 if (!isoktitle(sw,title)) { 308 efree(title); 309 return; 310 } 311 } 312 else { 313 if ((t = strrchr(path, '/')) != NULL) 314 { title=SafeStrCopy(title, t + 1,&lentitle); } 315 else 316 { title=SafeStrCopy(title, path, &lentitle); } 317 } 318 319 fileentry = (DOCENTRY *) emalloc(sizeof(DOCENTRY)); 320 fileentry->filename = (char *) estrdup(path); 321 /* Dup title to not to waste memory */ 322 fileentry->title = (char *) estrdup(title); 323 324 efree(title); 325 326 printfile(sw,fileentry); 280 filename = (char *) estrdup(path); 281 282 printfile(sw,filename); 327 283 } 328 284 … … 331 287 */ 332 288 333 void printfile(SWISH *sw, DOCENTRY *e)289 void printfile(SWISH *sw, char *filename) 334 290 { 335 291 char *s; … … 337 293 338 294 339 if ( e != NULL) {295 if (filename) { 340 296 if (sw->verbose == 3) { 341 if ((s = (char *) strrchr( e->filename, '/')) == NULL)342 printf(" %s", e->filename);297 if ((s = (char *) strrchr(filename, '/')) == NULL) 298 printf(" %s", filename); 343 299 else 344 300 printf(" %s", s + 1); … … 347 303 348 304 349 fprop = file_properties ( e->filename, e->filename, sw);350 do_index_file(sw,fprop ,e->title);305 fprop = file_properties (filename, filename, sw); 306 do_index_file(sw,fprop); 351 307 352 308 353 309 free_file_properties (fprop); 354 efree(e->filename); 355 efree(e->title); 356 efree(e); 310 efree(filename); 357 311 } 358 312 } … … 367 321 if(e) { 368 322 for(i=0;i<e->currentsize;i++) 369 printfile(sw, e-> dlist[i]);370 /* free the array and dlist*/371 efree(e-> dlist);323 printfile(sw, e->filenames[i]); 324 /* free the array and filenames */ 325 efree(e->filenames); 372 326 efree(e); 373 327 } … … 385 339 for(i=0;i<e->currentsize;i++) { 386 340 if (sw->verbose == 3) 387 printf("\nIn dir \"%s\":\n", e-> dlist[i]->filename);341 printf("\nIn dir \"%s\":\n", e->filenames[i]); 388 342 else if (sw->verbose == 2) 389 printf("Checking dir \"%s\"...\n",e->dlist[i]->filename); 390 indexadir(sw,e->dlist[i]->filename); 391 efree(e->dlist[i]->filename); 392 efree(e->dlist[i]->title); 393 efree(e->dlist[i]); 394 } 395 efree(e->dlist); 343 printf("Checking dir \"%s\"...\n",e->filenames[i]); 344 indexadir(sw,e->filenames[i]); 345 efree(e->filenames[i]); 346 } 347 efree(e->filenames); 396 348 efree(e); 397 349 } … … 399 351 400 352 401 402 /* This checks is a filename has one of the following suffixes:403 ** "htm", "HTM", "html", "HTML", "shtml", "SHTML".404 */405 /* 09/00 Jose Ruiz406 ** Modified to handle IndexContents and DefaultContents directives */407 int ishtml(sw,filename)408 SWISH *sw;409 char *filename;410 {411 char *c,*suffix;412 int DocType;413 if(!filename) return 0;414 415 c = (char *) strrchr(filename, '.');416 417 if (!c || c[1]=='\0') return 0;418 419 suffix=c+1;420 /* 09/00 Jose Ruiz */421 /* get DocType based in IndexContents or Defaultcontents */422 if((DocType=getdoctype(filename,sw->indexcontents))==NODOCTYPE && sw->DefaultDocType!=NODOCTYPE)423 DocType=sw->DefaultDocType;424 if(DocType==HTML) return 1;425 else if(DocType==NODOCTYPE) {426 /* IndexContents and DefaultContents not specified */427 /* So, use the old method for compatibility reasons */428 if (!strncmp(suffix, "htm", 3)) return 1;429 else if (!strncmp(suffix, "HTM", 3)) return 1;430 else if (!strncmp(suffix, "shtml", 5)) return 1;431 else if (!strncmp(suffix, "SHTML", 5)) return 1;432 }433 return 0;434 }435 436 /* Check if a particular title (read: file!) should be ignored437 ** according to the settings in the configuration file.438 */439 440 int isoktitle(sw,title)441 SWISH *sw;442 char *title;443 {444 int badfile;445 struct swline *tmplist;446 447 badfile = 0;448 tmplist = sw->titconlist;449 while (tmplist != NULL) {450 if (matchARegex(title, tmplist->line)) {451 badfile = 1;452 break;453 }454 tmplist = tmplist->next;455 }456 if (badfile)457 return 0;458 else459 return 1;460 }461 353 462 354 /********************************************************/ trunk/swish-e/src/fs.h
r2 r7 5 5 void indexadir(SWISH *, char *); 6 6 void indexafile(SWISH *, char *); 7 void printfile(SWISH *, DOCENTRY*);7 void printfile(SWISH *, char *); 8 8 void printfiles(SWISH *, DOCENTRYARRAY *); 9 9 void printdirs(SWISH *, DOCENTRYARRAY *); 10 int ishtml(SWISH *,char *);11 int isoktitle(SWISH *,char *);12 10 trunk/swish-e/src/http.c
r2 r7 56 56 #include "xml.h" 57 57 #include "txt.h" 58 #include "html.h" 58 59 59 60 typedef struct urldepth { … … 453 454 454 455 455 do_index_file(sw,fprop ,fprop->real_path);456 do_index_file(sw,fprop); 456 457 457 458 trunk/swish-e/src/index.c
r2 r7 91 91 ** $$ swish-e was originally designed to index html only. So the routines 92 92 ** $$ are for historically reasons scattered 93 ** $$ (e.g. isoktitle (), is okhtml() etc.) 93 ** $$ (e.g. isoktitle (), is ishtml() etc.) 94 ** 95 ** 2000-12 Jose Ruiz 96 ** obsolete routine ishtml removed 97 ** isoktitle moved to html.c 94 98 ** 95 99 */ … … 115 119 116 120 117 118 119 120 121 122 121 /* 123 122 -- Start the real indexing process for a file. … … 131 130 */ 132 131 133 void do_index_file (SWISH *sw, FileProp *fprop , char *title)132 void do_index_file (SWISH *sw, FileProp *fprop) 134 133 { 135 134 int wordcount; … … 137 136 char *rd_buffer=NULL; /* complete file read into buffer */ 138 137 139 140 /* $$ title is obsolete and has to be removed form all parent subroutines!141 $$ title has to retrieved from the countwordsroutines if necessary142 $$ also DocEntry is not needed in parent subroutines.... (IMO) - rasc 2000-11143 $$ to be discussed... (Jose should we remove this?)144 */145 printf ("obsolete title (same as fprop path information?): %s\n",title);146 147 138 wordcount = -1; 148 139 filtercmd = NULL; 149 150 140 151 141 if (fprop->work_path) { … … 186 176 case TXT: 187 177 if(sw->verbose == 3) printf(" - Using TXT filter - "); 188 wordcount = countwords_TXT(sw, fprop, title,rd_buffer);178 wordcount = countwords_TXT(sw, fprop, rd_buffer); 189 179 break; 190 180 191 181 case HTML: 192 182 if(sw->verbose == 3) printf(" - Using HTML filter - "); 193 wordcount = countwords (sw, fprop, title, rd_buffer);183 wordcount = countwords_HTML(sw, fprop, rd_buffer); 194 184 break; 195 185 196 186 case XML: 197 187 if(sw->verbose == 3) printf(" - Using XML filter - "); 198 wordcount = countwords_XML(sw, fprop, title,rd_buffer);188 wordcount = countwords_XML(sw, fprop, rd_buffer); 199 189 break; 200 190 201 191 case MULTITXT: 202 192 if(sw->verbose == 3) printf(" - Using MULTITXT filter - "); 203 wordcount = countwords (sw, fprop, title, rd_buffer);193 wordcount = countwords_HTML(sw, fprop, rd_buffer); 204 194 break; 205 195 206 196 case WML: 207 197 if(sw->verbose == 3) printf(" - Using WML filter - "); 208 wordcount = countwords (sw, fprop, title, rd_buffer);198 wordcount = countwords_HTML(sw, fprop, rd_buffer); 209 199 break; 210 200 211 201 default: 212 202 if(sw->verbose == 3) printf(" - Using DEFAULT filter - "); 213 wordcount = countwords (sw, fprop, title, rd_buffer);203 wordcount = countwords_HTML(sw, fprop, rd_buffer); 214 204 break; 215 205 } … … 230 220 else if (wordcount == 0) 231 221 printf(" (no words)\n"); 232 else 222 else if (wordcount == -1) 233 223 printf(" (not opened)\n"); 224 else if (wordcount == -2) 225 printf(" (title is not ok)\n"); 234 226 fflush(stdout); 235 227 } … … 247 239 */ 248 240 249 DOCENTRYARRAY *addsortentry(e, filename , title)241 DOCENTRYARRAY *addsorten
