| 1 |
#!@@perlbinary@@ -w |
|---|
| 2 |
use strict; |
|---|
| 3 |
|
|---|
| 4 |
# This is set to where Swish-e's "make install" installed the helper modules. |
|---|
| 5 |
use lib ( '@@perlmoduledir@@' ); |
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
use SWISH::Filter; |
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
=pod |
|---|
| 12 |
|
|---|
| 13 |
This is an example of how to use the SWISH::Filter module to filter |
|---|
| 14 |
documents using Swish-e's C<FileFilter> feature. This will filter any |
|---|
| 15 |
number of document types, depending on what filter modules are installed. |
|---|
| 16 |
|
|---|
| 17 |
This program should typically only be used for the -S fs indexing method. |
|---|
| 18 |
For -S http the F<swishspider> program calls SWISH::Filter directly. And -S |
|---|
| 19 |
prog programs written in Perl can also make use of SWISH::Filter directly. |
|---|
| 20 |
|
|---|
| 21 |
In general, you will not want to filter with this program if you have a lot |
|---|
| 22 |
of files to filter. Running a perl program for many documents will be slow |
|---|
| 23 |
(due to the compiliation of the perl program). If you have many documents |
|---|
| 24 |
to convert with the -S fs method of indexing then consider using -S prog |
|---|
| 25 |
with F<prog-bin/DirTree.pl> and use the SWISH::Filter module (see |
|---|
| 26 |
F<filters/README>). |
|---|
| 27 |
|
|---|
| 28 |
Swish-e configuration: |
|---|
| 29 |
|
|---|
| 30 |
FileFilter .pdf /path/to/swish_filter.pl |
|---|
| 31 |
FileFilter .doc /path/to/swish_filter.pl |
|---|
| 32 |
FileFilter .mp3 /path/to/swish_filter.pl |
|---|
| 33 |
IndexContents HTML2 .pdf .mp3 |
|---|
| 34 |
IndexContents TXT2 .doc |
|---|
| 35 |
|
|---|
| 36 |
Then when indexing those type of documents this program will attempt to filter (convert) |
|---|
| 37 |
them into a text format. |
|---|
| 38 |
|
|---|
| 39 |
See SWISH-CONFIG documentation on Filtering for more information. |
|---|
| 40 |
|
|---|
| 41 |
=cut |
|---|
| 42 |
|
|---|
| 43 |
|
|---|
| 44 |
my ( $work_path, $real_path ) = @ARGV; |
|---|
| 45 |
my $filter = SWISH::Filter->new; |
|---|
| 46 |
|
|---|
| 47 |
my $filtered = $filter->filter( |
|---|
| 48 |
document => $work_path, |
|---|
| 49 |
name => $real_path, |
|---|
| 50 |
content_type => \$real_path, # use the real path to lookup the content type |
|---|
| 51 |
); |
|---|
| 52 |
|
|---|
| 53 |
print STDERR $filtered ? " - Filtered: $real_path\n" : " - Not filtered: $real_path ($work_path)\n"; |
|---|
| 54 |
|
|---|
| 55 |
print $filtered |
|---|
| 56 |
? ${$filter->fetch_doc} |
|---|
| 57 |
: $real_path; |
|---|
| 58 |
|
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 |
|
|---|
| 62 |
|
|---|