root/swish-e/branches/2.6/conf/example9.pl

Revision 221, 1.6 kB (checked in by whmoseley, 8 years ago)

Replace the user.config file with a collection of
simple examples.

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!/usr/local/bin/perl -w
2 use strict;
3
4 # This is a short example that basically does the same
5 # thing as the default file system access method by
6 # recursing directories, but also shows how to process different
7 # file types -- in this example pdf is converted to xml for indexing.
8
9 # in this example, only .pdf and .config files are indexed.
10
11 # the pdf2xml module is in the prog-bin directory of the swish-e distribution
12 use lib '../prog-bin';
13
14 use File::Find;  # for recursing a directory tree
15 use pdf2xml;     # example module for pdf to xml conversion
16                  # Not that you need IndexContents XML .pdf in the
17                  # swish-e config file
18
19 # See perldoc File::Find for information on following symbolic links
20
21 use constant DEBUG => 0;
22
23 # See if a directory was passed in via the SwishProgParameters swish
24 # directive
25
26 my $dir = shift || '.';
27
28 find(
29     {
30         wanted => \&wanted,
31         no_chdir => 1,
32     },
33     $dir,
34 );
35
36 sub wanted {
37     return if -d;
38
39     if ( /\.pdf$/ ) {
40         print STDERR "Indexing pdf $File::Find::name\n" if DEBUG;
41         print ${ pdf2xml( $File::Find::name ) };
42
43     } elsif ( /\.config$/ ) {
44         print STDERR "Indexing $File::Find::name\n" if DEBUG;
45         print ${ get_content( $File::Find::name ) };
46
47     } else {
48         print STDERR "Skipping $File::Find::name\n" if DEBUG;
49     }
50 }
51
52
53 sub get_content {
54     my $path = shift;
55
56     my ( $size, $mtime )  = (stat $path )[7,9];
57     open FH, $path or die "$path: $!";
58
59     my $content =  <<EOF;
60 Content-Length: $size
61 Last-Mtime: $mtime
62 Path-Name: $path
63
64 EOF
65     local $/ = undef;
66     $content .= <FH>;
67     return \$content;
68 }
Note: See TracBrowser for help on using the browser.