|
Revision 2113, 1.3 kB
(checked in by karpet, 3 months ago)
|
perl examples for xapian
|
| Line | |
|---|
| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
use strict; |
|---|
| 7 |
use warnings; |
|---|
| 8 |
|
|---|
| 9 |
use Search::Xapian ':all'; |
|---|
| 10 |
use Getopt::Long; |
|---|
| 11 |
use Search::Tools; |
|---|
| 12 |
use Search::Tools::UTF8; |
|---|
| 13 |
|
|---|
| 14 |
my $index = 'xapian_index'; |
|---|
| 15 |
my $uri = ''; |
|---|
| 16 |
GetOptions( 'index=s' => \$index, 'uri=s' => \$uri ); |
|---|
| 17 |
|
|---|
| 18 |
binmode STDOUT, ':utf8'; |
|---|
| 19 |
|
|---|
| 20 |
die "$0 --index <name> [query | --uri=URI]\n" unless ( @ARGV or $uri ); |
|---|
| 21 |
my $query = join( ' ', map { lc($_) } @ARGV ); |
|---|
| 22 |
my $sb = 1; |
|---|
| 23 |
|
|---|
| 24 |
print "Searching $index\n"; |
|---|
| 25 |
my $db = Search::Xapian::Database->new($index); |
|---|
| 26 |
|
|---|
| 27 |
if ($uri) { |
|---|
| 28 |
if ( $db->term_exists( 'U' . $uri ) ) { |
|---|
| 29 |
my $iter = $db->postlist_begin( 'U' . $uri ); |
|---|
| 30 |
print "$uri -> " . $iter->get_docid, "\n"; |
|---|
| 31 |
} |
|---|
| 32 |
else { |
|---|
| 33 |
print "$uri is not in index\n"; |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
exit; |
|---|
| 37 |
} |
|---|
| 38 |
else { |
|---|
| 39 |
|
|---|
| 40 |
my $enq = $db->enquire($query); |
|---|
| 41 |
my $regex = Search::Tools->regexp(query => $query); |
|---|
| 42 |
my $snipper = Search::Tools->snipper(query => $regex); |
|---|
| 43 |
my $hiliter = Search::Tools->hiliter(query => $regex); |
|---|
| 44 |
|
|---|
| 45 |
printf "Running query '%s'\n", $enq->get_query()->get_description(); |
|---|
| 46 |
my @matches = $enq->matches( 0, 10 ); |
|---|
| 47 |
print scalar(@matches) . " results found\n"; |
|---|
| 48 |
foreach my $match (@matches) { |
|---|
| 49 |
my $doc = $match->get_document(); |
|---|
| 50 |
printf "ID %d %d%% [ %s ]\n", $match->get_docid(), |
|---|
| 51 |
$match->get_percent(), |
|---|
| 52 |
$hiliter->light( $snipper->snip( to_utf8( $doc->get_data() ) ) ); |
|---|
| 53 |
} |
|---|
| 54 |
|
|---|
| 55 |
} |
|---|