Show
Ignore:
Timestamp:
11/13/07 23:31:51 (1 year ago)
Author:
karpet
Message:

doc tweek; come config work

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • libswish3/trunk/perl/docmaker.pl

    r1948 r1955  
    55use SWISH::Prog::Headers; 
    66use Search::Tools::XML; 
     7use Term::ProgressBar; 
    78 
    89my $usage = "$0 [max_files] [utf_factor]\n"; 
     
    2627my $utf_factor = shift @ARGV; 
    2728$utf_factor = 10 
    28   unless 
    29   defined $utf_factor;    # every Nth word gets converted to random UTF string 
     29    unless 
     30    defined $utf_factor;  # every Nth word gets converted to random UTF string 
    3031 
    31 my $counter = 0; 
    32  
    33 my ($num_words, @words, $i, $j); 
     32my ( $num_words, @words ); 
    3433 
    3534binmode STDOUT, ":utf8"; 
     
    3837open DICT, "<$dict" or die "can't open $dict: $!\n"; 
    3938 
    40 for ($num_words = 0 ; $words[$num_words] = <DICT> ; $num_words++) 
    41 
     39for ( $num_words = 0; $words[$num_words] = <DICT>; $num_words++ ) { 
    4240    chomp $words[$num_words]; 
    4341 
    4442    # utf hack: convert every Nth word up a factor of $num_words > 1 
    45     if ($utf_factor > 0 && !$num_words % $utf_factor) 
    46     { 
     43    if ( $utf_factor > 0 && !$num_words % $utf_factor ) { 
    4744        no bytes;    # so ord() and chr() work as expected 
    4845                     #warn ">> $num_words: $words[$num_words]\n"; 
    4946        my $utf_word = ''; 
    50         for my $c (split(//, $words[$num_words])) 
    51         { 
    52             my $u = 
    53               chr(ord($c) + 30000); # 30000 puts it in Chinese range, I think... 
     47        for my $c ( split( //, $words[$num_words] ) ) { 
     48            my $u = chr( ord($c) + 30000 ) 
     49                ;    # 30000 puts it in Chinese range, I think... 
    5450            $utf_word .= $u; 
    5551        } 
     
    6460srand; 
    6561 
    66 for ($i = 0 ; $i < $max_files ; $i++) 
    67 
    68     my $this_file_words = 
    69       int(rand($max_words_per_file - $min_words_per_file + 1)) + 
    70       $min_words_per_file; 
    71     my $doc = ""; 
    72     for ($j = 0 ; $j < $this_file_words ; $j++) 
    73     { 
    74         $doc .= $words[int(rand($num_words - 1))] . " "; 
     62my $i = 0; 
     63my $progress 
     64    = Term::ProgressBar->new( { term_width => 80, count => $max_files } ); 
     65 
     66# preallocate memory (doesn't really matter after all...) 
     67my $doc = ' ' x ( $max_words_per_file * 10 ); 
     68my $xml = $doc; 
     69while ( $i++ < $max_files ) { 
     70    my $this_file_words 
     71        = int( rand( $max_words_per_file - $min_words_per_file + 1 ) ) 
     72        + $min_words_per_file; 
     73    $doc = ''; 
     74    my $word_cnt = 0; 
     75    while ( $word_cnt++ < $this_file_words ) { 
     76        $doc .= $words[ int( rand( $num_words - 1 ) ) ] . ' '; 
    7577    } 
    7678    Search::Tools::XML->escape($doc); 
    77     $doc = <<EOF 
    78 <?xml version="1.0" encoding="utf-8"?> 
     79    $xml = qq(<?xml version="1.0" encoding="utf-8"?> 
    7980<doc> 
    8081$doc 
    8182</doc> 
    82 EOF 
    83       ; 
     83); 
    8484 
    85     #print SWISH::Prog::Headers->head( $doc, { url=>$counter++, mtime=>time(), mime=>'text/xml' } ) . $doc; 
    86     print SWISH::Prog::Headers->head( 
    87                                      $doc, 
    88                                      { 
    89                                       url   => $counter++, 
    90                                       mtime => time(), 
    91                                       mime  => 'text/xml' 
    92                                      } 
    93                                     ) 
    94       . $doc; 
     85    my $header = SWISH::Prog::Headers->head( 
     86        $xml, 
     87        {   url   => $i, 
     88            mtime => time(), 
     89            mime  => 'text/xml' 
     90        } 
     91    ); 
     92 
     93    print $header, $xml; 
     94 
     95    $progress->update($i); 
    9596 
    9697}