| 1 |
|
|---|
| 2 |
package BuildIndex; |
|---|
| 3 |
use strict; |
|---|
| 4 |
use warnings; |
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
sub build_index_from_directory { |
|---|
| 9 |
my ($input, $index, $config, $extra_options) = @_; |
|---|
| 10 |
$config = "conf/basic-libxml2.conf" unless $config; |
|---|
| 11 |
$extra_options = "" unless $extra_options; |
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
my $cmd = "swish-e -c $config -i '$input' -f '$index' -v 1 $extra_options"; |
|---|
| 15 |
my $output = `$cmd`; |
|---|
| 16 |
print STDERR "$0: Running '$cmd'\n" if $ENV{TEST_VERBOSE}; |
|---|
| 17 |
|
|---|
| 18 |
die "$0: Didn't get any output from $cmd\n" unless $output; |
|---|
| 19 |
return parse_indexing_output( $output ); |
|---|
| 20 |
} |
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 |
sub build_index_from_external_program { |
|---|
| 25 |
my ($external_program, $index, $config, $extra_options) = @_; |
|---|
| 26 |
$config = "conf/basic-libxml2.conf" unless $config; |
|---|
| 27 |
$extra_options = "" unless $extra_options; |
|---|
| 28 |
|
|---|
| 29 |
my $cmd = "$external_program | swish-e -c $config -i stdin -f '$index' -v 1 -S prog $extra_options"; |
|---|
| 30 |
|
|---|
| 31 |
print STDERR "$0: Running '$cmd'\n" if $ENV{TEST_VERBOSE}; |
|---|
| 32 |
my $output = `$cmd`; |
|---|
| 33 |
die "$0: Didn't get any output from $cmd\n" unless $output; |
|---|
| 34 |
return parse_indexing_output( $output ); |
|---|
| 35 |
} |
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
|
|---|
| 39 |
sub parse_indexing_output { |
|---|
| 40 |
my $output = shift; |
|---|
| 41 |
my @output = split(/\r|\n/, $output); |
|---|
| 42 |
|
|---|
| 43 |
my %out; |
|---|
| 44 |
my $numreg = '([0-9]+)'; |
|---|
| 45 |
for(@output) { |
|---|
| 46 |
chomp(); |
|---|
| 47 |
s/,//g; # remove all commas, they made parsing harder. |
|---|
| 48 |
|
|---|
| 49 |
print "PROCESSING: $_\n" if defined($ENV{TEST_VERBOSE}) && $ENV{TEST_VERBOSE} > 1; |
|---|
| 50 |
|
|---|
| 51 |
$out{unique} = $1 if /^\s*($numreg)\s+unique\s+words?\s+indexed/; |
|---|
| 52 |
$out{properties} = $1 if /^\s*($numreg)\s+properties/; |
|---|
| 53 |
$out{files} = $1 if /^\s*($numreg)\s+files?\s+indexed/; |
|---|
| 54 |
$out{bytes} = $1 if /\s($numreg)\s+total\s+byte/; |
|---|
| 55 |
$out{words} = $1 if /\s($numreg)\s+total\s+word/; |
|---|
| 56 |
} |
|---|
| 57 |
die "Couldn't get data from swish-e index build, got " . |
|---|
| 58 |
join(", ", map { "$_ = {$out{$_}}" } keys(%out)) . "\n(output was " . join("\n", @output) . ")" |
|---|
| 59 |
unless (scalar(keys(%out)) == 5); |
|---|
| 60 |
return %out; |
|---|
| 61 |
} |
|---|
| 62 |
|
|---|
| 63 |
1; |
|---|