Changeset 2075

Show
Ignore:
Timestamp:
03/10/08 11:17:10 (2 months ago)
Author:
joshr
Message:

show progress every one minute, not every 10 seconds.
Also added comments.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Swishetest/trunk/make_collection

    r2056 r2075  
    1515 
    1616# Dict file with words. One word per line. 
    17 my $dict='data/C020-words-txt/words-linux-fc1.txt'; 
     17my $dict='data/C020-words-txt/words-linux-fc1.txt';     # 45,000 word dictionary (408K) 
    1818 
    1919my $min_words_per_file=100; 
     
    2828my $verbose = 0; 
    2929my $progress = 1; 
     30my $progress_seconds = 60; 
    3031my $lastprogresstime = 0; 
    3132 
     
    7576    my $wordcounter = 0; 
    7677    print "Creating files...\n" if $verbose; 
    77     for(my $i = 0; $i < $num_files; $i++) 
    78     { 
    79         if ($progress && time() - $lastprogresstime >= 10) { 
     78    for(my $i = 0; $i < $num_files; $i++) { 
     79        if ($i && $progress && time() - $lastprogresstime >= $progress_seconds) { 
    8080            my $percent = sprintf("%1.1f", $i / $num_files * 100); 
    81             print STDERR "$prog: $filetype: " . scalar(localtime(time())) . " on file $i of $num_files ($percent%)\n"; 
     81            print STDERR "$prog: $filetype: on file $i of $num_files ($percent%)\n"; 
    8282            $lastprogresstime = time(); 
    8383        } 
     
    114114        if ($base_dir) { 
    115115            my $path = "$base_dir/$i.$filetype"; 
    116             open(OUTFILE, ">" . " $path") || die "$prog: Couldn't open $path"; 
     116            open(OUTFILE, ">", $path) || die "$prog: Couldn't open $path"; 
    117117            print OUTFILE $doc; 
    118118            close(OUTFILE) || die "$prog: Couldn't close $path"; 
     
    127127# one block of text in xml 
    128128sub simple_xmlify { 
     129    # we should test with other encodings. This tests with ISO-8859-1 
    129130    return qq{<?xml version="1.0" encoding="ISO-8859-1"?>\n<swishdefault>\n} .  
    130131        $_[0] . "\n</swishdefault>\n\n";  
     
    133134# one block of text in txt 
    134135sub simple_txtify { 
    135     return "$_[0]\n"; 
     136    return $_[0] . "\n"; 
    136137} 
    137138 
     
    158159 
    159160sub simple_swishe_progify { 
    160     #my ($parser, $path, $content, $lasttime) = @_; 
     161    #my ($parser, $path, $content, $lasttime) = @_;  
     162    # we dont use named here, based on the (probably misguided)  
     163    #thinking that it may be faster. 
    161164    my $length = length($_[2]); 
    162165    my $header= <<EOF;