Changeset 2075
- Timestamp:
- 03/10/08 11:17:10 (2 months ago)
- Files:
-
- Swishetest/trunk/make_collection (modified) (7 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Swishetest/trunk/make_collection
r2056 r2075 15 15 16 16 # Dict file with words. One word per line. 17 my $dict='data/C020-words-txt/words-linux-fc1.txt'; 17 my $dict='data/C020-words-txt/words-linux-fc1.txt'; # 45,000 word dictionary (408K) 18 18 19 19 my $min_words_per_file=100; … … 28 28 my $verbose = 0; 29 29 my $progress = 1; 30 my $progress_seconds = 60; 30 31 my $lastprogresstime = 0; 31 32 … … 75 76 my $wordcounter = 0; 76 77 print "Creating files...\n" if $verbose; 77 for(my $i = 0; $i < $num_files; $i++) 78 { 79 if ($progress && time() - $lastprogresstime >= 10) { 78 for(my $i = 0; $i < $num_files; $i++) { 79 if ($i && $progress && time() - $lastprogresstime >= $progress_seconds) { 80 80 my $percent = sprintf("%1.1f", $i / $num_files * 100); 81 print STDERR "$prog: $filetype: " . scalar(localtime(time())) . "on file $i of $num_files ($percent%)\n";81 print STDERR "$prog: $filetype: on file $i of $num_files ($percent%)\n"; 82 82 $lastprogresstime = time(); 83 83 } … … 114 114 if ($base_dir) { 115 115 my $path = "$base_dir/$i.$filetype"; 116 open(OUTFILE, ">" . " $path") || die "$prog: Couldn't open $path";116 open(OUTFILE, ">", $path) || die "$prog: Couldn't open $path"; 117 117 print OUTFILE $doc; 118 118 close(OUTFILE) || die "$prog: Couldn't close $path"; … … 127 127 # one block of text in xml 128 128 sub simple_xmlify { 129 # we should test with other encodings. This tests with ISO-8859-1 129 130 return qq{<?xml version="1.0" encoding="ISO-8859-1"?>\n<swishdefault>\n} . 130 131 $_[0] . "\n</swishdefault>\n\n"; … … 133 134 # one block of text in txt 134 135 sub simple_txtify { 135 return "$_[0]\n";136 return $_[0] . "\n"; 136 137 } 137 138 … … 158 159 159 160 sub simple_swishe_progify { 160 #my ($parser, $path, $content, $lasttime) = @_; 161 #my ($parser, $path, $content, $lasttime) = @_; 162 # we dont use named here, based on the (probably misguided) 163 #thinking that it may be faster. 161 164 my $length = length($_[2]); 162 165 my $header= <<EOF;
