root/libswish3/trunk/perl/xindex.pl

Revision 2113, 1.8 kB (checked in by karpet, 3 months ago)

perl examples for xapian

Line 
1 #!/usr/bin/perl
2 #
3 # example xapian indexer
4 #
5
6 use strict;
7 use warnings;
8 use Search::Xapian ':all';
9 use IndexerUtils;
10 use Getopt::Long;
11 use Search::Tools::UTF8;
12
13 my $index   = 'xapian_index';
14 my $verbose = 0;
15 GetOptions( 'index=s' => \$index, 'verbose' => \$verbose );
16
17 die "$0 --index <name> files_to_index\n" unless @ARGV;
18
19 # these constants should match libswish3.h
20 my $SWISH_PROP_MTIME_ID = 5;
21 my $SWISH_PROP_DOCID_ID = 0;
22
23 my $db
24     = Search::Xapian::WritableDatabase->new( $index, DB_CREATE_OR_OVERWRITE )
25     or die "can't create write-able db object: $!\n";
26
27 for my $file ( IndexerUtils::aggregate(@ARGV) ) {
28
29     my $uri = "U$file";
30
31     if ( $db->term_exists($uri) ) {
32
33         $verbose and print "$file already in db: skipping ...\n";
34         next;
35
36     }
37
38     my $val = 1;
39     my $buf = IndexerUtils::normalize( $file, $verbose );
40     #$buf = to_utf8( $buf );
41
42     my $doc = Search::Xapian::Document->new
43         or die "can't create doc object for $file: $!\n";
44     my $analyzer = Search::Xapian::TermGenerator->new;
45     $analyzer->set_document($doc);
46     $analyzer->index_text($buf);
47
48     # set_data() can be used to store whatever you want
49     # but can't be sorted on. so it's like an unsortable property
50     $doc->set_data("$file: " . to_utf8( $buf ));
51
52     # add_value() is similar to Swish-e properties
53     # results can be sorted by 'value'
54     # and each 'value' needs a unique number (like a property id)
55     $doc->add_value( $SWISH_PROP_MTIME_ID, time() );    # indexed time
56     $doc->add_value( $SWISH_PROP_DOCID_ID, "$file" );
57
58     # add_term() is where you would add a word with no positional info
59     # could have META: prefixed however and optional 'weight' as second param
60     # we use uri as unique term
61     $doc->add_term($uri);
62
63     $db->add_document($doc) or die "failed to add $file: $!";
64
65 }
Note: See TracBrowser for help on using the browser.