root/swish-e/branches/2.6/example/search.cgi.in

Revision 1766, 20.3 kB (checked in by augur, 4 years ago)

Perl scripts will now run from "C:\Program Files" and such on Windows.

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!@@perlbinary@@ -w
2 #!/usr/bin/speedy -w
3 package SwishAPISearch;
4 use strict;
5
6 ######################################################################
7 # Skeleton CGI script for searching a Swsih-e index with SWISH::API.
8 # see below for documenation or run "perldoc search.cgi"
9 #
10 # Copyright 2003, 2004 Bill Moseley - All rights reserved.
11 #
12 # $Id$
13 #
14 #######################################################################
15
16 use vars '$VERSION';
17 $VERSION = '1.1';
18
19
20 # This needs to be set to where Swish-e installed the Perl modules
21
22 # This is set to where Swish-e's "make install" installed the helper modules.
23 use lib ( '@@perlmoduledir@@' );
24
25 #------------------- Modules --------------------------------------
26 use SWISH::API;             # for searching the index file
27 use SWISH::ParseQuery;      # Parses the query string
28 use SWISH::PhraseHighlight; # for highlighting
29 use CGI;                    # provides a param() method -- could use Apache::Request, for example.
30 use HTML::FillInForm;       # makes the form elements sticky
31 use Template;               # Template-Toolkit: http://tt2.org or see http://search.cpan.org
32
33
34
35
36 #-------------------- Defaults/Parameters --------------------------
37 # Default config settings
38 #
39 # prop_to_meta defines the metas that are used for searching the text displayed
40 # by the give property.  This is only needed when the property name and metaname
41 # do not match up.
42 # prop_to_meta => {
43 #   swishdescription => [ qw/ swishdefault / ],
44 #   swishtitle => [ qw/ swishdefault swishtitle / ],
45 # },
46 # Which says when displaying the swishdescription property use the search words
47 # from the swishdefault metaname (if any) for searching.  And when displaying the
48 # swishtitle property use words form both swishdefault and swishtitle (when
49 # indexing HTML swish indexes the <title> along with the body under the swishdefault
50 # metaname).
51
52
53 use vars qw/ %config %highlight_settings %site_cache /;
54
55 %config = (
56     INCLUDE_PATH => [ '@@templatedir@@' ],       # template path
57     index        => 'index.swish-e',             # path to index file
58     page_size    => 10,                          # numbe of results/page
59     title        => 'Swish Example Search Page',
60     template     => 'search.tt',
61     prop_to_meta => {
62         swishdescription => [ qw/ swishdefault / ],
63         swishtitle => [ qw/ swishdefault swishtitle / ],
64     },
65 );
66
67
68 # Params used for the highlighting modules
69
70 %highlight_settings = (
71     show_words      => 8,  # number of words to show
72     occurrences     => 5,   # number of words to show
73     max_words       => 100, # max number of words to show if not highlighted words found
74     highlight_on    => '<span class="highlight">',
75     highlight_off   => '</span>',
76 );
77
78 #--------------------- Code ----------------------------------------
79 # Entry point for normal CGI programs.
80 # Should be object creation
81
82 unless ( $ENV{MOD_PERL} ) {
83
84     $site_cache{_singleton} ||= {
85         config  => \%config,  # no deep copy
86     };
87
88     process_request( $site_cache{_singleton}  );
89 }
90
91
92 # Entry point for mod_perl
93 sub handler {
94     my $r = shift;
95
96     require Storable;
97
98     my $id = $r->dir_config('site_id') || '_singleton';
99
100     unless ( $site_cache{ $id } ) {
101
102         $site_cache{ $id } ||= {
103             config => Storable::dclone( \%config ), # deep copy
104         };
105
106         my $config =  $site_cache{ $id }{config};
107
108         for ( qw/  index page_size title temmplate / ) {
109             my $value = $r->dir_config( $_ );
110             $config->{$_} = $value if defined $value;
111         }
112
113         if ( my $template_path = $r->dir_config('template_path') ) {
114             unshift @{$config->{INCLUDE_PATH}}, $template_path;
115         }
116     }
117
118     process_request( $site_cache{ $id } );
119
120     return Apache::Constants::OK();
121 }
122
123
124
125 #-------------------------------------------------------------------
126 # Process a request
127 # Passes in a config hash
128 #-------------------------------------------------------------------
129
130
131 sub process_request {
132     my ( $instance ) = @_;  # bad name since it persists between requests
133
134     my $cgi = CGI->new;  # could also be Apache::Request or other fast access to CGI params
135
136     my $config = $instance->{config};
137
138     my $request = {
139         cgi         => $cgi,
140         myself      => $cgi->url(-path=>1),
141         query       => $cgi->param('query') || undef,
142         metaname    => $cgi->param('metaname') || undef,
143         page        => $cgi->param('page') || 1,
144         pagesize    => $cgi->param('size') || $config->{page_size} || 10,
145         pid         => $$,
146     };
147
148     $instance->{request} = $request;
149
150
151     # If a query was passed in then run the search
152     if ( $request->{query} ) {
153
154         #  Limit by metaname
155         $request->{swish_query} = $request->{metaname}
156             ? "$request->{metaname}=( $request->{query} )"
157             : $request->{query};
158
159         $instance->{result} = run_query( $instance );
160     }
161
162
163     # Generate output
164     my $output = generate_view( $instance );
165
166
167     # Run output through HTML::FillInForm to make form elements sticky
168
169     my $fill_in_object = HTML::FillInForm->new;
170     print $cgi->header;
171     print $fill_in_object->fill( scalarref => $output, fobject => $cgi );
172
173     delete $instance->{request};  # clean up the request
174     delete $instance->{result};
175 }
176
177
178
179
180 # Subroutine to run the Swish query.  Returns a hash reference.
181 # A better design might be to return an object with methods for accessing the data.
182
183 sub run_query {
184     my ( $instance ) = @_;
185
186     my $config = $instance->{config};
187     my $request = $instance->{request};
188
189     my $page = $request->{page};
190     my $pagesize = $request->{pagesize};
191
192     $page = 1 unless defined $page  && $page =~ /^\d+$/;
193     $pagesize = 15 unless defined $pagesize && $pagesize =~ /^\d+$/ && $pagesize > 0 && $pagesize < 50;
194
195
196     # Create the swish object if not cached.
197     # Also read in the header data and initialize the highlighting module
198
199     my $swish = $instance->{swish};
200     my $msg;
201
202     if ( ! $swish ) {
203         $swish = SWISH::API->new( $config->{index} );
204         die "Failed to create SWISH::API object" unless $swish;
205         return { message => check_swish_error( $swish ) } if $swish->Error;
206
207         $instance->{swish} = $swish;  # cache for next request or for template
208
209         # Note, this only works with a single index file
210         my %headers = map { lc($_) => ($swish->HeaderValue( $config->{index}, $_ )||'') } $swish->HeaderNames;
211
212         # and cache the highlighting object
213         # Note if searching more than one index with differing settings then need one
214         # highlight object for each index
215         $instance->{highlight_object} = SWISH::PhraseHighlight->new( \%highlight_settings, \%headers, { swish => $swish } );
216     }
217
218
219     # Run the search.  See SWISH::API for more options (like sorting)
220
221     my $results = $swish->Query( $request->{swish_query} );
222
223     return { message => check_swish_error( $swish ) } if $swish->Error;
224     return { hits  => 0 } unless $results->Hits;
225
226
227     # Seek to the first record of the page requested
228
229     $results->SeekResult( ($page-1) * $pagesize );
230
231     return { message => check_swish_error( $swish ) } if $swish->Error;
232
233     my @records;
234     my $result;
235     my $cnt = $pagesize;
236
237
238
239     # Store the result objects in an array
240     push @records, $result while $cnt-- && ($result = $results->NextResult);
241
242
243     # Return the results structure
244
245     my %result = (
246         results_obj => $results,
247         results     => \@records,
248         hits        => $results->Hits,
249         shown       => scalar @records,
250         page        => $page,
251         start       => ($page-1) * $pagesize,
252     );
253
254
255
256
257     $result{prev} = $page-1 if $page > 1;
258     $result{next} = $page+1 if $result{start} + $pagesize < $result{hits};
259
260     return \%result;
261 }
262
263
264 # Return swish error messages
265
266 sub check_swish_error {
267     my $swish = shift;
268
269     return unless $swish->Error;
270     my $message = join( ' ', $swish->ErrorString, $swish->LastErrorMsg );
271     die "$message\n" if $swish->CriticalError;
272     return $message;
273
274 }
275
276
277 # This generates the output from the templates
278
279 sub generate_view{
280     my ( $instance ) = @_;
281
282     my $config = $instance->{config};
283     my $result = $instance->{result};
284
285     $instance->{template_object} ||= Template->new( INCLUDE_PATH => $config->{INCLUDE_PATH} )
286         || die $Template::ERROR, "\n";
287
288     my $template = $instance->{template_object};
289
290
291     # Create a highlight filter if any results
292     create_highlight_filter( $instance ) if $result->{hits};
293
294
295     my $template_output;
296     $template->process( $config->{template}, $instance, \$template_output ) || die $template->error;
297
298     return \$template_output;
299
300 }
301
302 # Creates a filter for highlighting search terms
303
304 sub create_highlight_filter {
305     my ( $instance ) = @_;
306
307     my $result = $instance->{result};
308     my $config = $instance->{config};
309
310
311     # Now create a filter 'highlight' for use in the template to highlight terms
312     # Usage requires passing in the *metaname* associated with the property
313     # that's being highlighted -- this allows the program to know what
314     # search words to use in highlighting
315
316     my $parsed_query = parse_query( join ' ', $result->{results_obj}->ParsedWords( $config->{index} ) );
317
318
319
320
321     # save for Data::Dumper
322     $result->{parsed_query} = $parsed_query;
323
324     # Now create the Template-Toolkit "filter"
325
326     $instance->{template_object}->context->define_filter( 'highlight',
327         sub {
328             my ( $context,  $property_name, $result_object ) = @_;
329
330             my @phrases;
331
332             # Do we need to map the property name to a metaname?
333             my $metas = $config->{prop_to_meta}{$property_name} || [ $property_name ];
334
335
336             # Now collect the query words used, if any
337             # Might also check for duplicate phrases for a small optimization
338
339             for ( @$metas ) {
340                 push @phrases,  @{$parsed_query->{$_}} if $parsed_query->{$_};
341             }
342
343             # Longest phrases first
344             @phrases = sort { @$b <=> @$a } @phrases;
345
346
347             # Here's the filter factory
348             return sub {
349                 my $text = shift;
350                 $instance->{highlight_object}->highlight( \$text, \@phrases, $property_name, $result_object );
351                 return $text;
352             }
353         },
354
355         1,
356     );
357 }
358 __END__
359
360 =head1 NAME
361
362 search.cgi -- Example Perl program for searching with Swish-e and SWISH::API
363
364 =head1 DESCRIPTION
365
366 This is a very simple program that shows how to use the SWISH::API module
367 in a CGI script or mod_perl handler using Template-Toolkit to generate
368 output.  This program is intended for programmers that want to create a custom
369 search script.
370
371 Unlike F<swish.cgi> this script does not have many features, and provides no
372 external configuration (with the execption of a few config options under
373 mod_perl).  So don't ask why it doesn't do something.  The point is that this
374 script is used as a starting point that YOU customize.
375
376 =head1 REQUIREMENTS
377
378 You must have swish-e and the SWISH::API module installed.  See the README
379 and INSTALL documents in the swish-e distribution.  As of this writing SWISH::API
380 is part of the swish-e distribution, but in the future may be provided as a separate
381 package (provided on the CPAN).  In either case SWISH::API is a separate installation
382 procedure from installing swish-e.  The Storable module is also required if using mod_perl.
383
384 This program does require that some modules are installed from CPAN.
385 You will need Template-Toolkit and HTML::FillInForm (which depends on HTML::Parser).
386 How those are installed depends on your computer's packaging system.
387
388 You will need a web server, obviously.  The discussion below assumes Apache is used.
389 If you are using MS IIS take note that IIS works differently in a number of ways.
390
391 =head1 OVERVIEW
392
393 The F<search.cgi> script and related templates are installed when swish-e is installed.
394 F<search.cgi> is installed in $prefix/lib/swish-e/ and templates are installed
395 in $prefix/share/swish-e/templates/.  $prefix is /usr/local by default
396 but can be changed when running the swish-e F<configure> script.  Upon
397 installation F<search.cgi> is updated with correct paths to your perl binary and
398
399 When running as a CGI script F<search.cgi> is copied or symlinked to the location
400 of your CGI scripts (or any directory that allows CGI scripts).  By default,
401 the F<search.cgi> script looks for the index F<index.swish-e> in the current
402 directory (that's what the web server considers the current directory).  On Apache
403 running mod-cgi that's the same place as the script.  On IIS it's not.  If your
404 index is elsewhere you will need to modify the script.
405
406 The script works by parsing the query, calling SWISH::API to run the actual search, then
407 calls Template-Toolkit to generate the ouput.
408
409 The script calls the F<search.tt> template.  This template generates the query
410 form and the search results.  The F<search.tt> template uses a
411 Template-Toolkit "WRAPPER" function to wrap the search form and results in your
412 site's design.   This design is in the F<page_layout> template.   The idea is
413 if you use Template-Toolkit to manage your entire site then your entire site
414 would be formatted by the same F<page_layout> template.  The F<page_layout> template
415 calls two other templates F<common_header> and F<common_footer> to generate a common
416 header and footer for the site.  Those are just demonstrating Template-Toolkit's
417 features.
418
419 The F<page_layout> page only defines the basic structure of the site.  The true
420 design of the site is managed by style sheets.  F<style.css> defines the basic
421 layout and F<markup.css> sets fonts and colors. 
422
423 Note: these style sheets are included directly in the output of the CGI script.
424 In production the style sheets would be stored as separate style
425 sheet files and imported by the browser instead of directly included in the
426 search results page.
427
428 See the section MOD_PERL below for more on templates.
429
430 Highlighting of search terms is provided by the SWISH::PhraseHighlight module.
431 That is a very slow module, so you may wish to disable it if you expect a lot
432 of traffic.
433
434
435 =head1 INSTALLATION EXAMPLE
436
437 Enough talking, sometimes it's nice to see a complete example.  Below swish-e
438 is installed in the default location (/usr/local).  The "$" is a normal user
439 prompt, where "#" is a root prompt.  Use ./configure --prefix to install in another
440 location (e.g. if you do not have root access).
441
442 Download and install swish-e
443
444     $ wget -q http://swish-e.org/Download/latest.tar.gz
445     $ tar zxf latest.tar.gz
446     $ cd swish-e-2.x.x
447     $ (./configure && make) >/dev/null
448     $ make check
449     $ su
450     # make install
451     # exit
452
453 Install SWISH::API
454
455     $ cd perl
456     $ perl Makefile.PL && make && make test
457     $ su
458     # make install
459     $ exit
460
461 Install requried Perl modules.  You can install via RPMs, Debs or directly from the CPAN
462 or by using the CPAN shell.
463
464     # su
465     # perl -MCPAN -e 'install Template'
466     # perl -MCPAN -e 'install HTML::FillInForm'
467     # exit
468
469 Now setup the script in someplace that allows CGI scripts.
470
471     $ cd $HOME/apache
472     $ ln -s /usr/local/lib/swish-e/search.cgi .
473     $ cat .htaccess
474     deny from all
475     <files search.cgi>
476         allow from all
477         SetHandler cgi-script
478         Options +ExecCGI
479     </files>
480
481 Create an index
482
483     $ cat swish.config
484     IndexOnly .htm .html
485     DefaultContents HTML*
486     StoreDescription HTML* <body>
487     metanames swishtitle swishdocpath
488
489     $ swish-e -c swish.config -i /usr/share/doc/apache-doc/manual
490
491 Test the index and the CGI script:
492
493     $ swish-e -w apache -m1 | grep hits
494     # Number of hits: 152
495
496     $ lynx -dump http://localhost/apache/search.cgi?query=apache | grep hits
497         Showing page 1 (1 - 10 of 152 hits) [3]Next
498               'hits' => 152,
499
500 Now, the above isn't very helpful because the Apache documentation indexed is not
501 in the web space.  You would likely index content available on your web site.
502
503 =head1 Using with SpeedyCGI
504
505 Perl CGI script must be compiled for each request.  SpeedyCGI is a tool to speed up
506 scripts by running them persistently.  To run F<search.cgi> with SpeedyCGI install
507 the program (you can Google, right?) and then change the first line of F<search.cgi>
508 to run the F<speedy> program.
509
510 For example:
511
512     #!/usr/bin/speedy -w
513
514
515 =head1 Using with MOD_PERL
516
517 This script can be run directly as a mod_perl handler, and the same code can be used
518 to run multiple sites by using separate Location directives and passing in a "site id."
519 The script caches in memory different configurations based on this site id.
520
521 Below is a complete httpd.conf file.  It requires an Apache httpd that has
522 mod_perl compiled in statically.  It runs mod_perl on a high port (port 5000)
523 listening to all interfaces. 
524
525 For testing I put this config file in a directory along with F<search.cgi>, but
526 that's just done to make the example simple (i.e. so I don't have to show any
527 absolute paths).  Normally the httpd.conf and the swish.cgi "module" would be
528 in separate locations.
529
530
531     # httpd.conf -- test file for search.cgi as mod_perl handler
532
533     <ifModule mod_so.c>
534         LoadModule mime_module /usr/lib/apache/1.3/mod_mime.so
535     </IfModule>
536
537     ErrorLog swish_error_log
538     PidFile swish_httpd.pid
539
540     Listen *:5000
541
542     <perl>
543         push @PerlSetVar, [
544             index  => Apache->server_root_relative( 'index.swish-e'),
545         ];
546         $DocumentRoot =  Apache->server_root_relative;
547         require "search.cgi";
548     </perl>
549
550     NameVirtualHost *:5000
551     <VirtualHost *:5000>
552
553         ServerName localhost
554
555         <Location /search>
556             SetHandler  perl-script
557             PerlHandler SwishAPISearch
558         </Location>
559
560         <Location /othersite>
561             SetHandler perl-script
562             PerlHandler SwishAPISearch
563             # Define this site
564             PerlSetVar  site_id othersite
565             PerlSetVar  title "Some other Site"
566         </Location>
567
568     </VirtualHost>
569
570 The server is started using this command:
571
572     $ /usr/sbin/apache-perl -d $(pwd) -f $(pwd)/httpd.conf
573
574 which says to use the current directory as the ServerRoot.
575 (See comments below.)  Stop the server like:
576
577     $ kill `cat swish_httpd.pid`
578
579 Then access either:
580
581     http://localhost:5000/search
582     http://localhost:5000/othersite
583
584 A few Notes:
585
586 I like test configurations to not care where things are located.  Thus, the
587 above httpd.conf does a few tricks in the "Perl Section" shown.
588
589 First, mod_perl, unlike CGI, doesn't set the working directory.  So, the index file
590 name must be absolute.  This is accomplished by a PerlSetVar entry building
591 the index file name from the ServerRoot.
592
593 Second, the DocumentRoot is set to the same as the ServerRoot.  The DocumentRoot
594 needs to be set so search.cgi can figure out the path to the script (for
595 creating next and previous links).
596
597 Third, the script is loaded by a C<require> statement.  This works only because
598 the current directory "." is in Perl's @INC path at Apache start up time and
599 F<search.cgi> is also in the current directory.  Normally, set PERL5LIB
600 on server startup or use a "use lib" line in your startup.pl file to point to
601 the location of search.cgi.
602
603 The "PerlSetVar" lines pass config information into the script.  Note that they can
604 be set globally or specific to a given Location.
605
606 The following config options are currently available:
607
608 =over 4
609
610 =item site_id
611
612 The site_id options allow caching of configurations on a per-site basis.
613 It's overkill in this example, but normally you might have expensive configuration
614 processes that you might want to do only once.  But, since there is caching by this id
615 it's a good id to set a site_id if using more than one Location directive.
616
617 =item index
618
619 This specifies the index file to use.  The index file needs to be absolute
620 as discussed above.  Example:
621
622     PerlSetVar index /usr/share/swish/site.index
623
624 =item title
625
626 This options sets the title that's passed into the template.
627
628 =item template
629
630 Sets the file name of the template use to generate the form.  This might be useful
631 if you want an "advanced" form, for example.
632
633 =item template_path
634
635 This can be used to update the path where templates are searched.  Useful if you wish
636 to override templates.
637
638 =item page_size
639
640 This allow changing the default number of results shown per page.
641
642 =back
643
644
645 =head1 SUPPORT
646
647 Not much support is provided.  But what support is provided is ONLY provided via
648 the Swish-e discussion list.
649
650     http://swish-e.org/
651
652
653 =head1 AUTHOR
654
655 Bill Moseley
656
657 =head1 LICENSE
658
659 Copyright 2003, 2004 Bill Moseley.  All rights reserved.
660
661 This program is free software; you can redistribute it and/or modify it
662 under the same terms as Perl itself.
663
664 =head1 SEE ALSO
665
666 SWISH::API,  Template, HTML::FillInForm
667
668
Note: See TracBrowser for help on using the browser.