|
Revision 2113, 1.8 kB
(checked in by karpet, 3 months ago)
|
perl examples for xapian
|
| Line | |
|---|
| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
use strict; |
|---|
| 7 |
use warnings; |
|---|
| 8 |
use Search::Xapian ':all'; |
|---|
| 9 |
use IndexerUtils; |
|---|
| 10 |
use Getopt::Long; |
|---|
| 11 |
use Search::Tools::UTF8; |
|---|
| 12 |
|
|---|
| 13 |
my $index = 'xapian_index'; |
|---|
| 14 |
my $verbose = 0; |
|---|
| 15 |
GetOptions( 'index=s' => \$index, 'verbose' => \$verbose ); |
|---|
| 16 |
|
|---|
| 17 |
die "$0 --index <name> files_to_index\n" unless @ARGV; |
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
my $SWISH_PROP_MTIME_ID = 5; |
|---|
| 21 |
my $SWISH_PROP_DOCID_ID = 0; |
|---|
| 22 |
|
|---|
| 23 |
my $db |
|---|
| 24 |
= Search::Xapian::WritableDatabase->new( $index, DB_CREATE_OR_OVERWRITE ) |
|---|
| 25 |
or die "can't create write-able db object: $!\n"; |
|---|
| 26 |
|
|---|
| 27 |
for my $file ( IndexerUtils::aggregate(@ARGV) ) { |
|---|
| 28 |
|
|---|
| 29 |
my $uri = "U$file"; |
|---|
| 30 |
|
|---|
| 31 |
if ( $db->term_exists($uri) ) { |
|---|
| 32 |
|
|---|
| 33 |
$verbose and print "$file already in db: skipping ...\n"; |
|---|
| 34 |
next; |
|---|
| 35 |
|
|---|
| 36 |
} |
|---|
| 37 |
|
|---|
| 38 |
my $val = 1; |
|---|
| 39 |
my $buf = IndexerUtils::normalize( $file, $verbose ); |
|---|
| 40 |
|
|---|
| 41 |
|
|---|
| 42 |
my $doc = Search::Xapian::Document->new |
|---|
| 43 |
or die "can't create doc object for $file: $!\n"; |
|---|
| 44 |
my $analyzer = Search::Xapian::TermGenerator->new; |
|---|
| 45 |
$analyzer->set_document($doc); |
|---|
| 46 |
$analyzer->index_text($buf); |
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 |
|
|---|
| 50 |
$doc->set_data("$file: " . to_utf8( $buf )); |
|---|
| 51 |
|
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
$doc->add_value( $SWISH_PROP_MTIME_ID, time() ); |
|---|
| 56 |
$doc->add_value( $SWISH_PROP_DOCID_ID, "$file" ); |
|---|
| 57 |
|
|---|
| 58 |
|
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 |
$doc->add_term($uri); |
|---|
| 62 |
|
|---|
| 63 |
$db->add_document($doc) or die "failed to add $file: $!"; |
|---|
| 64 |
|
|---|
| 65 |
} |
|---|