|
Revision 1913, 1.4 kB
(checked in by karpet, 1 year ago)
|
for all the world to see
|
| Line | |
|---|
| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
use strict; |
|---|
| 15 |
use Text::FormatTable; |
|---|
| 16 |
|
|---|
| 17 |
my ($num,$index) = @ARGV; |
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
$index ||= 'index.swish-e'; |
|---|
| 21 |
$num ||= 50; |
|---|
| 22 |
|
|---|
| 23 |
unless( -s $index ) |
|---|
| 24 |
{ |
|---|
| 25 |
die "no such index: $index\n"; |
|---|
| 26 |
} |
|---|
| 27 |
unless( $num ) |
|---|
| 28 |
{ |
|---|
| 29 |
die "need a number of words to output\n"; |
|---|
| 30 |
} |
|---|
| 31 |
|
|---|
| 32 |
my $count; |
|---|
| 33 |
my $cmd = "swish-e -f $index -T INDEX_WORDS"; |
|---|
| 34 |
|
|---|
| 35 |
warn $cmd, $/; |
|---|
| 36 |
|
|---|
| 37 |
open(SWISH, "$cmd |") |
|---|
| 38 |
or die "can't exec '$cmd': $!\n"; |
|---|
| 39 |
|
|---|
| 40 |
while(<SWISH>) { |
|---|
| 41 |
chomp; |
|---|
| 42 |
my ($word,@insts) = split /\[\d+ /, $_ ; |
|---|
| 43 |
INST: for my $i (@insts) { |
|---|
| 44 |
next INST if ! $i; |
|---|
| 45 |
my ($doc,$cnt) = split(/\s+/,$i); |
|---|
| 46 |
$count->{$word}->[0] += $cnt; |
|---|
| 47 |
$count->{$word}->[1]++; |
|---|
| 48 |
} |
|---|
| 49 |
} |
|---|
| 50 |
|
|---|
| 51 |
close(SWISH); |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
|
|---|
| 56 |
my $tbl = new Text::FormatTable('r r l l'); |
|---|
| 57 |
$tbl->head('#','word','count','unique docs'); |
|---|
| 58 |
$tbl->rule('='); |
|---|
| 59 |
my $seen = 0; |
|---|
| 60 |
my $n = 0; |
|---|
| 61 |
|
|---|
| 62 |
for my $word (sort { |
|---|
| 63 |
$count->{$b}->[0] <=> $count->{$a}->[0] |
|---|
| 64 |
} keys %$count) { |
|---|
| 65 |
my ($cnt,$docs) = @{ $count->{$word} }; |
|---|
| 66 |
$tbl->row(++$n, $word, $cnt, $docs); |
|---|
| 67 |
last if ++$seen == $num; |
|---|
| 68 |
} |
|---|
| 69 |
|
|---|
| 70 |
print $tbl->render(60); |
|---|
| 71 |
|
|---|