root/swish-e/branches/2.6/example/swish.cgi.in

Revision 1830, 105.8 kB (checked in by karman, 2 years ago)

Conlon caught typo

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
<
Line 
1 #!@@perlbinary@@ -w
2 package SwishSearch;
3 use strict;
4
5 # This is set to where Swish-e's "make install" installed the helper modules.
6 use lib ( '@@perlmoduledir@@' );
7
8
9 my $DEFAULT_CONFIG_FILE = '.swishcgi.conf';
10
11 ###################################################################################
12 #
13 #    If this text is displayed on your browser then your web server
14 #    is not configured to run .cgi programs.  Contact your web server administrator.
15 #
16 #    To display documentation for this program type "perldoc swish.cgi"
17 #
18 #    swish.cgi $Revision$ Copyright (C) 2001 Bill Moseley swishscript@hank.org
19 #    Example CGI program for searching with SWISH-E
20 #
21 #    This example program will only run under an OS that supports fork().
22 #    Under windows it uses a piped open which MAY NOT BE SECURE.
23 #
24 #
25 #    This program is free software; you can redistribute it and/or
26 #    modify it under the terms of the GNU General Public License
27 #    as published by the Free Software Foundation; either version
28 #    2 of the License, or (at your option) any later version.
29 #
30 #    This program is distributed in the hope that it will be useful,
31 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
32 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33 #    GNU General Public License for more details.
34 #
35 #    The above lines must remain at the top of this program
36 #
37 #    $Id$
38 #
39 ####################################################################################
40
41 # This is written this way so the script can be used as a CGI script or a mod_perl
42 # module without any code changes.
43
44 # use CGI ();  # might not be needed if using Apache::Request
45
46
47 #=================================================================================
48 #   CGI entry point
49 #
50 #=================================================================================
51
52
53 use vars '$speedy_config';  # Global for caching in persistent environment such as SpeedyCGI
54
55 # Run the script -- entry point if running as a CGI script
56
57     unless ( $ENV{MOD_PERL} ) {
58         if ( !$speedy_config ) {
59             $speedy_config = default_config();
60
61             # Merge with disk config file.
62             $speedy_config = merge_read_config( $speedy_config );
63         }
64
65         process_request( $speedy_config );
66     }
67
68
69
70
71 #==================================================================================
72 #   This sets the default configuration parameters
73 #
74 #   Any configuration read from disk is merged with these settings.
75 #
76 #   Only a few settings are actually required.  Some reasonable defaults are used
77 #   for most.  If fact, you can probably create a complete config as:
78 #
79 #    return = {
80 #        swish_binary    => '/usr/local/bin/swish-e',
81 #        swish_index     => '/usr/local/share/swish/index.swish-e',
82 #        title_property  => 'swishtitle',  # Not required, but recommended
83 #    };
84 #
85 #   But, that doesn't really show all the options.
86 #
87 #   You can modify the options below, or you can use a config file.  The config file
88 #   is .swishcgi.conf by default (read from the current directory) that must return
89 #   a hash reference.  For example, to create a config file that changes the default
90 #   title and index file name, plus uses Template::Toolkit to generate output
91 #   create a config file as:
92 #
93 #       # Example config file -- returns a hash reference
94 #       return {
95 #           title           => 'Search Our Site',
96 #           swish_index     => 'index.web',
97 #
98 #           template => {
99 #               package         => 'SWISH::TemplateToolkit',
100 #               file            => 'swish.tt',
101 #               options         => {
102 #                   INCLUDE_PATH    => '/home/user/swish-e/example',
103 #               },
104 #           },
105 #        };
106 #
107 #
108 #-----------------------------------------------------------------------------------
109
110 sub default_config {
111
112
113
114     ##### Configuration Parameters #########
115
116     #---- This lists all the options, with many commented out ---
117     # By default, this config is used -- see the process_request() call below.
118
119     # You should adjust for your site, and how your swish index was created.
120
121     ##>>
122     ##>>  Please don't post this entire section on the swish-e list if looking for help!
123     ##>>
124     ##>>  Send a small example, without all the comments.
125
126     #======================================================================
127     #                   *** NOTES ****
128     #   Items beginning with an "x" or "#" are commented out
129     #   the "x" form simply renames (hides) that setting.  It's used
130     #   to make it easy to disable a mult-line configuation setting.
131     #
132     #   If you do not understand a setting then best to leave the default.
133     #
134     #   Please follow the documentation (perldoc swish.cgi) and set up
135     #   a test using the defaults before making changes.  It's much easier
136     #   to modify a working example than to try to get a modified example to work...
137     #
138     #   Again, this is a Perl hash structure.  Commas are important.
139     #======================================================================
140
141
142     return {
143         title           => 'Search our site',  # Title of your choice.  Displays on the search page
144         swish_binary    => '@@swishbinary@@',  # Location of swish-e binary
145
146
147         # By default, this script tries to read a config file.  You should probably
148         # comment this out if not used save a disk stat
149         config_file     => $DEFAULT_CONFIG_FILE,    # Default config file
150
151
152         # The location of your index file.  Typically, this would not be in
153         # your web tree.
154         # If you have more than one index to search then specify an array
155         # reference.  e.g. swish_index =>[ qw( index1 index2 index3 )],
156
157         swish_index     => 'index.swish-e',    # Location of your index file
158                                                # See "select_indexes" below for how to
159                                                # select more than one index.
160
161         page_size       => 15,                 # Number of results per page  - default 15
162
163
164
165
166         # prepend this path to the filename (swishdocpath) returned by swish.  This is used to
167         # make the href link back to the original document.  Comment out to disable.
168
169         #prepend_path    => 'http://localhost/mydocs',
170
171
172
173         # This is the property that is used for the href link back to the original
174         # document.  It's "swishdocpath" by default
175
176         #link_property   => 'swishdocpath',
177
178
179         ## Display properties ##
180
181         # Everything swish records about a file is called a "property".  These
182         # next three settings tell the swish.cgi script which properties should be passed
183         # to the templating coded for output generation.
184
185
186         # First is the property name to use as the main link text to the indexed document.
187         # Typically, this will be 'swishtitle' if have indexed html documents,
188         # but you can specify any PropertyName defined in your document.
189         # By default, swish will display the pathname for documents that do not
190         # have a title.
191         # In other words, this is used for the text of the links of the search results.
192         #  <a href="prepend_path/swishdocpath">title_property</a>
193
194         title_property => 'swishtitle',
195
196
197
198         # Swish has a configuration directive "StoreDescription" that will save part or
199         # all of a document's contents in the index file.  This can then be displayed
200         # along with results.  If you are indexing a lot of files this can use a lot of disk
201         # space, so test carefully before indexing your entire site.
202         # Building swish with zlib can greatly reduce the space used by StoreDescription.
203         #
204         # This settings tells this script to display this property as the description.
205         # Normally, this should be 'swishdescription', but you can specify another property name.
206         # There is no default.
207
208         description_prop    => 'swishdescription',
209
210
211
212         # Property names listed here will be displayed in a table below each result
213         # You may wish to modify this list if you are using document properties (PropertyNames)
214         # in your swish-e index configuration
215         # There is no default.
216
217         display_props   => [qw/swishlastmodified swishdocsize swishdocpath/],
218
219
220
221
222
223         # Results can be be sorted by any of the properties listed here
224         # They will be displayed in a drop-down list on the form.
225         # You may modify this list if you are using document properties of your own creation
226         # Swish uses the rank as the default sort
227
228         sorts           => [qw/swishrank swishlastmodified swishtitle swishdocpath/],
229
230
231         # Secondary_sort is used to sort within a sort
232         # You may enter a property name followed by a direction (asc|desc)
233
234         secondary_sort  => [qw/swishlastmodified desc/],
235
236
237
238
239         # You can limit by MetaNames here.  Names listed here will be displayed in
240         # a line of radio buttons.
241         # The default is to not allow any metaname selection.
242         # To use this feature you must define MetaNames while indexing.
243
244         # The special "swishdefault" says to search any text that was not indexed
245         # as a specific metaname (e.g. typically the body of a HTML document and its title).
246
247         # To see how this might work, add to your *swish-e* config file:
248         #   MetaNames swishtitle swishdocpath
249         # reindex and try:
250
251         metanames       => [qw/ swishdefault swishtitle swishdocpath /],
252
253         # Add "all" to this list to test the meta_groups feature described below
254
255
256
257         # Another example: if you indexed an email archive
258         # that defined the metanames subject name email (as in the swish-e discussion archive)
259         # you might use:
260         #metanames       => [qw/body subject name email/],
261
262
263         # Searching multiple meta names:
264
265         # You can also group metanames into "meta-metanames".
266         # Example: Say you defined metanames "author", "comment" and "keywords"
267         # You want to allow searching "author", "comment" and the document body ("swishdefault")
268         # But you would also like an "all" search that searches all metanames, including "keywords":
269         #
270         #   metanames  => [qw/swishdefault author comment all/],
271         #
272         # Now, the "all" metaname is not a real metaname.  It must be expanded into its
273         # individual metanames using meta_groups:
274         #
275         #  "meta_groups" maps a fake metaname to a list of real metanames
276         #
277         #   meta_groups => {
278         #       all => [qw/swishdefault author comment keywords / ],
279         #   },
280         #
281         #  swish.cgi will then take a query like
282         #
283         #       all=(query words)
284         #
285         #  and create the query
286         #
287         #       swishdefault=(query words) OR author=(query words) OR comment=(query words) OR keywords=(query words)
288         #
289         #  This is not ideal, but should work for most cases
290         #  (might fail under windows since the query is passed through the shell).
291
292         # To enable this group add "all" to the list of metanames above
293
294         meta_groups => {
295             all =>  [qw/swishdefault swishtitle swishdocpath/],
296         },
297
298         # Note that you can use other words than "all".  The script just checks if a given metaname is
299         # listed in "meta_groups" and expands as needed.
300
301
302         # "name_labels" is used to map MetaNames and PropertyNames to user-friendly names
303         # on the CGI form.
304
305         name_labels => {
306             swishdefault        => 'Title & Body',
307             swishtitle          => 'Title',
308             swishrank           => 'Rank',
309             swishlastmodified   => 'Last Modified Date',
310             swishdocpath        => 'Document Path',
311             swishdocsize        => 'Document Size',
312             all                 => 'All',              # group of metanames
313             subject             => 'Message Subject',  # other examples
314             name                => "Poster's Name",
315             email               => "Poster's Email",
316             sent                => 'Message Date',
317         },
318
319
320         timeout         => 10,    # limit time used by swish when fetching results - DoS protection.
321                                   # does not work under Windows
322
323         max_query_length => 100,  # limit length of query string.  Swish also has a limit (default is 40)
324                                   # You might want to set swish-e's limit higher, and use this to get a
325                                   # somewhat more friendly message.
326
327
328
329
330
331         max_chars       => 500,   # Limits the size of the description_prop if it is not highlighted
332
333         # This structure defines term highlighting, and what type of highlighting to use
334         # If you are using metanames in your searches and they map to properties that you
335         # will display, you may need to adjust the "meta_to_prop_map".
336
337         highlight       => {
338
339             # Pick highlighting module -- you must make sure the module can be found
340             # The highlighting modules are in the example/modules directory by default
341
342             # Ok speed, but doesn't handle phrases or stopwords
343             # Deals with stemming, and shows words in context
344             # Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars.
345             #package         => 'SWISH::DefaultHighlight',
346
347             # Somewhat slow, but deals with phases, stopwords, and stemming.
348             # Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars.
349             package         => 'SWISH::PhraseHighlight',
350
351             # Faster: phrases without regard to wordcharacter settings
352             # doesn't do context display, so must match in first X words, so may not even highlight
353             # doesn't handle stemming or stopwords.
354             #package         => 'SWISH::SimpleHighlight',
355
356             show_words      => 10,    # Number of "swish words" words to show around highlighted word
357             max_words       => 100,   # If no words are found to highlighted then show this many words
358             occurrences     => 6,     # Limit number of occurrences of highlighted words
359             #highlight_on   => '<b>', # HTML highlighting codes
360             #highlight_off  => '</b>',
361             highlight_on    => '<font style="background:#FFFF99">',
362             highlight_off   => '</font>',
363
364             # This maps (real) search metatags to display properties.
365             # e.g. if searching in "swishdefault" then highlight in the
366             # swishtitle and swishdescription properties
367             # Do not include "fake" metanames defined with meta_groups, just
368             # list the real metanames used in your index, and the properties they
369             # relate to.
370
371             meta_to_prop_map => {
372                 swishdefault    => [ qw/swishtitle swishdescription/ ],
373                 swishtitle      => [ qw/swishtitle/ ],
374                 swishdocpath    => [ qw/swishdocpath/ ],
375             },
376         },
377
378
379
380         # If you specify more than one index file (as an array reference) you
381         # can set this allow selection of which indexes to search.
382         # The default is to search all indexes specified if this is not used.
383         # When used, the first index is the default index.
384
385         # You need to specify your indexes as an array reference:
386         #swish_index     => [ qw/ index.swish-e index.other index2.other index3.other index4.other / ],
387
388         Xselect_indexes  => {
389             # pick radio_group, popup_menu, or checkbox_group
390             method  => 'checkbox_group',
391             #method => 'radio_group',
392             #method => 'popup_menu',
393
394             columns => 3,
395             # labels must match up one-to-one with elements in "swish_index"
396             labels  => [ 'Main Index', 'Other Index', qw/ two three four/ ],
397             description => 'Select Site: ',
398
399             # Optional - Set the default index if none is selected
400             # This needs to be an index file name listed in swish_index
401             # above, not a label
402
403             default_index => '',
404         },
405
406
407         # Similar to select_indexes, this adds a metaname search
408         # based on a metaname.  You can use any metaname, and this will
409         # add an "AND" search to limit results to a subset of your records.
410         # i.e. it adds something like  'site=(foo or bar or baz)' if foo, bar, and baz were selected.
411
412         # This really just allows you to limit existing searches by a metaname, instead of
413         # selecting a metaname (with metanames option above).
414
415         # Swish-e's ExtractPath would work well with this.  For example,
416         # to allow limiting searches to specific sections of the apache docs use this
417         # in your swish-e config file:
418         #   ExtractPath site regex !^/usr/local/apache/htdocs/manual/([^/]+)/.+$!$1!
419         #   ExtractPathDefault site other
420         # which extracts the segment of the path after /manual/ and indexes that name
421         # under the metaname "site".  Then searches can be limited to files with that
422         # path (e.g. query would be swishdefault=foo AND site=vhosts to limit searches
423         # to the virtual host section.
424
425
426         Xselect_by_meta  => {
427             #method      => 'radio_group',  # pick: radio_group, popup_menu, or checkbox_group
428             method      => 'checkbox_group',
429             #method      => 'popup_menu',
430             columns     => 3,
431             metaname    => 'site',     # Can't be a metaname used elsewhere!
432             values      => [qw/misc mod vhosts other/],
433             labels  => {
434                 misc    => 'General Apache docs',
435                 mod     => 'Apache Modules',
436                 vhosts  => 'Virtual hosts',
437             },
438             description => 'Limit search to these areas: ',
439         },
440
441
442
443         # The 'template' setting defines what generates the output
444         # The default is "TemplateDefault" which is reasonably ugly,
445         # but does not require installation of a separate templating system.
446
447         # Note that some of the above options may not be available
448         # for templating, as it's up to you to layout the form
449         # and swish-e results in your template.
450
451         # TemplateDefault is the default
452
453         xtemplate => {
454             package     => 'SWISH::TemplateDefault',
455         },
456
457         xtemplate => {
458             package     => 'SWISH::TemplateDumper',
459         },
460
461         xtemplate => {
462             package         => 'SWISH::TemplateToolkit',
463             file            => 'swish.tt',
464             options         => {
465                 INCLUDE_PATH    => '@@pkgdatadir@@',
466                 #PRE_PROCESS     => 'config',
467             },
468         },
469
470         xtemplate => {
471             package         => 'SWISH::TemplateHTMLTemplate',
472             options         => {
473                 filename            => 'swish.tmpl',
474                 path                => '@@pkgdatadir@@',
475                 die_on_bad_params   => 0,
476                 loop_context_vars   => 1,
477                 cache               => 1,
478             },
479         },
480
481
482
483         # The "on_intranet" setting is just a flag that can be used to say you do
484         # not have an external internet connection.  It's here because the default
485         # page generation includes links to images on swish-e.or and on www.w3.org.
486         # If this is set to one then those images will not be shown.
487         # (This only effects the default ouput module SWISH::TemplateDefault)
488
489         on_intranet => 0,
490
491
492
493         # Here you can hard-code debugging options.  The will help you find
494         # where you made your mistake ;)
495         # Using all at once will generate a lot of messages to STDERR
496         # Please see the documentation before using these.
497         # Typically, you will set these from the command line instead of in the configuration.
498
499         # debug_options => 'basic, command, headers, output, summary, dump',
500
501
502
503         # This defines the package object for reading CGI parameters
504         # Defaults to CGI.  Might be useful with mod_perl.
505         # request_package     => 'CGI',
506         # request_package     => 'Apache::Request',
507
508
509         # use_library => 1,  # set true and will use the SWISH::API module
510                            # will cache based on index files when running under mod_perl
511
512
513         # Minor adjustment to page display.  The page navigation normally looks like:
514         #                          Page: 1 5 6 7 8 9 24
515         # where the first page and last page are always displayed.  These can be disabled by
516         # by setting to true values ( 1 )
517
518         no_first_page_navigation   => 0,
519         no_last_page_navigation    => 0,
520         num_pages_to_show          => 12,  # number of pages to offer
521
522
523
524
525         # Limit to date ranges
526
527
528
529         # This adds in the date_range limiting options
530         # You will need the DateRanges.pm module from the author to use that feature
531
532         # Noramlly, you will want to limit by the last modified date, so specify
533         # "swishlastmodified" as the property_name.  If indexing a mail archive, and, for
534         # example, you store the date (a unix timestamp) as "date" then specify
535         # "date" as the property_name.
536
537         date_ranges     => {
538             property_name   => 'swishlastmodified',      # property name to limit by
539
540             # what you specify here depends on the DateRanges.pm module.
541             time_periods    => [
542                 'All',
543                 'Today',
544                 'Yesterday',
545                 #'Yesterday onward',
546                 'This Week',
547                 'Last Week',
548                 'Last 90 Days',
549                 'This Month',
550                 'Last Month',
551                 #'Past',
552                 #'Future',
553                 #'Next 30 Days',
554             ],
555
556             line_break      => 0,
557             default         => 'All',
558             date_range      => 1,
559         },
560
561
562         # This is suppose to reduce the load on systems if hit with a large number
563         # of requests.  Although this will limit the number of swish-e processes run
564         # it will not limit the number of CGI requests.  I feel like a better solution
565         # is to use mod_perl (with the SWISH::API module).
566         # I also think that running /bin/ps for every is not ideal.
567
568         # This only works on unix-based systems when running the swish-e binary.
569         # It greps /swish-e/ from the output of ps and aborts if the count is < limit_procs
570
571         # Set max number of swish-e binaries and ps command to run
572         limit_procs     => 0,  # max number of swish process to run (zero to not limit)
573         ps_prog         => '/bin/ps -Unobody -ocommand',  # command to list number of swish binaries
574
575     };
576
577 }
578
579 #^^^^^^^^^^^^^^^^^^^^^^^^^ end of user config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
580 #========================================================================================
581
582
583
584 #=================================================================================
585 #   mod_perl entry point
586 #
587 #   As an example, you might use a PerlSetVar to point to paths to different
588 #   config files, and then cache the different configurations by path.
589 #
590 #=================================================================================
591
592 my %cached_configs;
593
594 sub handler {
595     my $r = shift;
596
597     if ( my $config_path = $r->dir_config( 'Swish_Conf_File' ) ) {
598
599         # Already cached?
600         # Note that this is cached for the life of the server -- must restart if want to change config
601
602         if ( $cached_configs{ $config_path } ) {
603             process_request( $cached_configs{ $config_path } );
604             return Apache::Constants::OK();
605         }
606
607
608         # Else, load config
609         my $config = default_config();
610         $config->{config_file} = $config_path;
611
612         # Merge with disk config file.
613         $cached_configs{ $config_path } = merge_read_config( $config );
614
615         process_request( $cached_configs{ $config_path } );
616         return Apache::Constants::OK();
617     }
618
619
620     # Otherwise, use hard-coded config
621     my $config = default_config();
622
623     # Merge with disk config file.
624     $config = merge_read_config( $config );
625
626     process_request( default_config() );
627
628     return Apache::Constants::OK();
629
630 }
631
632
633 #============================================================================
634 #   Read config settings from disk, and merge
635 #   Note, all errors are ignored since by default this script looks for a
636 #   config file.
637 #
638 #============================================================================
639 sub merge_read_config {
640     my $config = shift;
641
642
643     set_default_debug_flags();
644
645     set_debug($config);  # get from config or from %ENV
646
647
648     return $config unless $config->{config_file};
649
650     my $return = do $config->{config_file};  # load the config file
651
652     unless ( ref $return eq 'HASH' ) {
653
654         # First, let's check for file not found for the default config, which we can ignore
655
656         my $error = $@ || $!;
657
658         if ( $config->{config_file} eq $DEFAULT_CONFIG_FILE && !-e $config->{config_file} ) {
659             warn "Config file '$config->{config_file}': $!" if $config->{debug};
660             return $config;
661         }
662
663         die "Config file '$config->{config_file}': $error";
664     }
665
666
667
668     if ( $config->{debug} || $return->{debug} ) {
669         require Data::Dumper;
670         print STDERR "\n---------- Read config parameters from '$config->{config_file}' ------\n",
671               Data::Dumper::Dumper($return),
672               "-------------------------\n";
673     }
674
675     set_debug( $return );
676
677
678     # Merge settings
679     return { %$config, %$return };
680 }
681
682 #--------------------------------------------------------------------------------------------------
683 sub set_default_debug_flags {
684     # Debug flags defined
685
686     $SwishSearch::DEBUG_BASIC       = 1;  # Show command used to run swish
687     $SwishSearch::DEBUG_COMMAND     = 2;  # Show command used to run swish
688     $SwishSearch::DEBUG_HEADERS     = 4;  # Swish output headers
689     $SwishSearch::DEBUG_OUTPUT      = 8;  # Swish output besides headers
690     $SwishSearch::DEBUG_SUMMARY     = 16;  # Summary of results parsed
691     $SwishSearch::DEBUG_RESULTS     = 32;  # Detail of results parsed
692     $SwishSearch::DEBUG_DUMP_DATA   = 64;  # dump data that is sent to templating modules
693 }
694
695
696
697
698 #---------------------------------------------------------------------------------------------------
699 sub set_debug {
700     my $conf = shift;
701
702     $conf->{debug} = 0;
703
704     my $debug_string =  $ENV{SWISH_DEBUG} ||$conf->{debug_options};
705     return unless $debug_string;
706
707
708     my %debug = (
709         basic       => [$SwishSearch::DEBUG_BASIC,   'Basic debugging'],
710         command     => [$SwishSearch::DEBUG_COMMAND, 'Show command used to run swish'],
711         headers     => [$SwishSearch::DEBUG_HEADERS, 'Show headers returned from swish'],
712         output      => [$SwishSearch::DEBUG_OUTPUT,  'Show output from swish'],
713         summary     => [$SwishSearch::DEBUG_SUMMARY, 'Show summary of results'],
714         results     => [$SwishSearch::DEBUG_RESULTS, 'Show detail of results'],
715         dump        => [$SwishSearch::DEBUG_DUMP_DATA, 'Show all data available to templates'],
716     );
717
718
719     $conf->{debug} = 1;
720
721     my @debug_str;
722
723     for ( split /\s*,\s*/, $debug_string ) {
724         if ( exists $debug{ lc $_ } ) {
725             push @debug_str, lc $_;
726             $conf->{debug} |= $debug{ lc $_ }->[0];
727             next;
728         }
729
730         print STDERR "Unknown debug option '$_'.  Must be one of:\n",
731              join( "\n", map { sprintf('  %10s: %10s', $_, $debug{$_}->[1]) } sort { $debug{$a}->[0] <=> $debug{$b}->[0] }keys %debug),
732              "\n\n";
733         exit;
734     }
735
736     print STDERR "Debug level set to: $conf->{debug} [", join( ', ', @debug_str), "]\n";
737 }
738
739
740 #============================================================================
741 #
742 #   This is the main controller (entry point), where a config hash is passed in.
743 #
744 #   Loads the request module (e.g. CGI.pm), and the output module
745 #   Also sets up debugging
746 #
747 #============================================================================
748
749 sub process_request {
750     my $conf = shift;  # configuration parameters
751
752
753
754     # Limit number of requests - questionable value
755     limit_swish( $conf->{limit_procs}, $conf->{ps_prog} )
756         if !$conf->{use_library}
757           && $conf->{limit_procs} && $conf->{limit_procs} =~ /^\d+$/
758           && $conf->{ps_prog};
759
760
761
762     # Set default property used or the href link to the document
763     $conf->{link_property} ||= 'swishdocpath';
764
765     # Use CGI.pm by default
766     my $request_package = $conf->{request_package} || 'CGI';
767
768     load_module( $request_package );
769     my $request_object = $request_package->new;
770
771
772     # load the templating module
773     my $template = $conf->{template} || { package => 'SWISH::TemplateDefault' };
774     load_module( $template->{package} );
775
776
777     # Allow fixup within the config file
778     if ( $conf->{request_fixup} && ref $conf->{request_fixup} eq 'CODE' ) {
779         &{$conf->{request_fixup}}( $request_object, $conf );
780     }
781
782
783     set_debug_input( $conf, $request_object )
784         if $conf->{debug} && !$ENV{GATEWAY_INTERFACE};
785
786
787     # Create search object and build a query based on CGI parameters
788     my $search = SwishQuery->new(
789         config    => $conf,
790         request   => $request_object,
791     );
792
793
794
795
796     # run the query (run if there's a query)
797     $search->run_query;  # currently, results is the just the $search object
798
799     if ( $search->hits ) {
800         $search->set_navigation;  # sets links
801     }
802
803
804
805     show_debug_output( $conf, $search )
806         if $conf->{debug};
807
808
809     $template->{package}->show_template( $template, $search );
810 }
811
812
813 # For limiting number of swish-e binaries
814
815 sub limit_swish {
816     my ( $limit_procs, $ps_prog ) = @_;
817
818
819     my $num_procs = scalar grep { /swish-e/ } `$ps_prog`;
820     return if $num_procs <= $limit_procs;
821
822     warn "swish.cgi - limited due to too many currently running swish-e binaries: $num_procs running is more than $limit_procs\n";
823
824     ## Abort
825     print <<EOF;
826 Status: 503 Too many requests
827
828 <html>
829 <head><title>Too Many Requests</title></head>
830 <body>
831 Too Many Requests -- Try back later
832 </body>
833 </html>
834 EOF
835
836     exit;
837 }
838
839
840
841
842 #============================================================================
843 #
844 #   Loads a perl module -- and shows a pretty web page to say the obvious
845 #
846 #
847 #============================================================================
848 sub load_module {
849     my $package = shift;
850     $package =~ s[::][/]g;
851     eval { require "$package.pm" };
852     if ( $@ ) {
853         print <<EOF;
854 Content-Type: text/html
855
856 <html>
857 <head><title>Software Error</title></head>
858 <body><h2>Software Error</h2><p>Please check error log</p></body>
859 </html>
860 EOF
861
862         die "$0 $@\n";
863     }
864 }
865
866
867
868 #==================================================================
869 # set debugging input
870 #
871 #==================================================================
872
873 sub set_debug_input {
874     my ( $conf, $request_object ) = @_;
875
876     print STDERR 'Enter a query [all]: ';
877     my $query = <STDIN>;
878     $query =~ tr/\r//d;
879     chomp $query;
880     unless ( $query ) {
881         print STDERR "Using 'not asdfghjklzxcv' to match all records\n";
882         $query = 'not asdfghjklzxcv';
883     }
884
885     $request_object->param('query', $query );
886
887     print STDERR 'Enter max results to display [1]: ';
888     my $max = <STDIN>;
889     chomp $max;
890     $max = 1 unless $max && $max =~/^\d+$/;
891
892     $conf->{page_size} = $max;
893 }
894
895 #==================================================================
896 # show debugging output
897 #
898 #==================================================================
899 sub show_debug_output {
900     my ( $conf, $results ) = @_;
901
902     require Data::Dumper;
903
904
905     if ( $results->hits ) {
906         print STDERR "swish.cgi: returned a page of $results->{navigation}{showing} results of $results->{navigation}{hits} total hits\n";
907     } else {
908         print STDERR "swish.cgi: no results\n";
909     }
910
911     if ($conf->{debug} & $SwishSearch::DEBUG_HEADERS ) {
912         print STDERR "\n------------- Index Headers ------------\n";
913         if ( $results->{_headers} ) {
914             print STDERR Data::Dumper::Dumper( $results->{_headers} );
915         } else {
916             print STDERR "No headers\n";
917         }
918
919         print STDERR "--------------------------\n";
920     }
921
922
923
924     if ( $conf->{debug} & $SwishSearch::DEBUG_DUMP_DATA ) {
925         print STDERR "\n------------- Results structure passed to template ------------\n",
926               Data::Dumper::Dumper( $results ),
927               "--------------------------\n";
928
929     } elsif ( $conf->{debug} & $SwishSearch::DEBUG_SUMMARY ) {
930         print STDERR "\n------------- Results summary ------------\n";
931         if ( $results->{hits} ) {
932             print STDERR "$_->{swishrank} $_->{swishdocpath}\n" for @{ $results->{_results}};
933
934         } else {
935             print STDERR "** NO RESULTS **\n";
936         }
937
938     } elsif ( $conf->{debug} & $SwishSearch::DEBUG_RESULTS ) {
939         print STDERR "\n------------- Results detail ------------\n";
940         if ( $results->{hits} ) {
941             print STDERR Data::Dumper::Dumper( $results->{_results} );
942         } else {
943             print STDERR "** NO RESULTS **\n";
944         }
945
946         print STDERR "--------------------------\n";
947     }
948 }
949
950
951
952
953
954
955
956 #==================================================================================================
957 package SwishQuery;
958 #==================================================================================================
959
960 use Carp;
961 # Or use this instead -- PLEASE see perldoc CGI::Carp for details
962 # <opinion>CGI::Carp doesn't help that much</opinion>
963 #use CGI::Carp; # qw(fatalsToBrowser);
964
965 use SWISH::ParseQuery;
966
967
968
969
970 #--------------------------------------------------------------------------------
971 # new() doesn't do much, just create the object
972 #--------------------------------------------------------------------------------
973 sub new {
974     my $class = shift;
975     my %options = @_;
976
977     my $conf = $options{config};
978
979     croak "Failed to set the swish index files in config setting 'swish_index'" unless $conf->{swish_index};
980     croak "Failed to specify 'swish_binary' in configuration" unless $conf->{swish_binary};
981
982     # initialize the request search hash
983     my $sh = {
984        prog         => $conf->{swish_binary},
985        config       => $conf,
986        q            => $options{request},
987        hits         => 0,
988        MOD_PERL     => $ENV{MOD_PERL},
989     };
990
991     my $self = bless $sh, $class;
992
993
994     # load highlight module, if requsted
995
996     if ( my $highlight = $self->config('highlight') ) {
997         $highlight->{package} ||= 'SWISH::DefaultHighlight';
998         SwishSearch::load_module( $highlight->{package} );
999     }
1000
1001
1002     # Fetch the swish-e query from the CGI parameters
1003     $self->set_query;
1004
1005     return $self;
1006 }
1007
1008
1009 sub hits { shift->{hits} }
1010
1011 sub config {
1012     my ($self, $setting, $value ) = @_;
1013
1014     confess "Failed to pass 'config' a setting" unless $setting;
1015
1016     my $cur = $self->{config}{$setting} if exists $self->{config}{$setting};
1017
1018     $self->{config}{$setting} = $value if $value;
1019
1020     return $cur;
1021 }
1022
1023 # Returns false if all of @values are not valid options - for checking
1024 # $config is what $self->config returns
1025
1026 sub is_valid_config_option {
1027     my ( $self, $config, $err_msg, @values ) = @_;
1028
1029     unless ( $config ) {
1030         $self->errstr( "No config option set: $err_msg" );
1031         return;
1032     }
1033
1034     # Allow multiple values.
1035     my @options = ref $config eq 'ARRAY' ? @$config : ( $config );
1036
1037     my %lookup = map { $_ => 1 } @options;
1038
1039     for ( @values ) {
1040         unless ( exists $lookup{ $_ } ) {
1041             $self->errstr( $err_msg );
1042             return;
1043         }
1044     }
1045
1046     return 1;
1047 }
1048
1049
1050 sub header {
1051     my $self = shift;
1052     return unless ref $self->{_headers} eq 'HASH';
1053
1054     return $self->{_headers}{$_[0]} || '';
1055 }
1056
1057
1058 # return a ref to an array
1059 sub results {
1060     my $self = shift;
1061     return $self->{_results} || undef;
1062 }
1063
1064 sub navigation {
1065     my $self = shift;
1066     return unless ref $self->{navigation} eq 'HASH';
1067
1068     return exists $self->{navigation}{$_[0]} ? $self->{navigation}{$_[0]} : '';
1069 }
1070
1071 sub CGI { $_[0]->{q} };
1072
1073
1074
1075
1076 sub swish_command {
1077
1078     my ($self, $param_name, $value ) = @_;
1079
1080     return $self->{swish_command} || {} unless $param_name;
1081     return $self->{swish_command}{$param_name} || '' unless $value;
1082
1083     $self->{swish_command}{$param_name} = $value;
1084 }
1085
1086 # For use when forking
1087
1088 sub swish_command_array {
1089
1090     my ($self ) = @_;
1091
1092     my @params;
1093     my $swish_command = $self->swish_command;
1094
1095     for ( keys %$swish_command ) {
1096
1097         my $value = $swish_command->{$_};
1098
1099         if ( /^-/ ) {
1100             push @params, $_;
1101             push @params, ref $value eq 'ARRAY' ? @$value : $value;
1102             next;
1103         }
1104
1105         # special cases
1106         if ( $_ eq 'limits' ) {
1107             push @params, '-L', $value->{prop}, $value->{low}, $value->{high};
1108             next;
1109         }
1110
1111         die "Unknown swish_command '$_' = '$value'";
1112     }
1113
1114     return @params;
1115
1116 }
1117
1118
1119
1120 sub errstr {
1121     my ($self, $value ) = @_;
1122
1123
1124     $self->{_errstr} = $value if $value;
1125
1126     return $self->{_errstr} || '';
1127 }
1128
1129
1130 #==============================================================================
1131 # Set query from the CGI parameters
1132 #------------------------------------------------------------------------------
1133
1134 sub set_query {
1135     my $self = shift;
1136     my $q = $self->{q};
1137
1138     # Sets the query string, and any -L limits.
1139     return unless $self->build_query;
1140
1141     # Set the starting position (which is offset by one)
1142
1143     my $start = $q->param('start') || 0;
1144     $start = 0 unless $start =~ /^\d+$/ && $start >= 0;
1145
1146     $self->swish_command( '-b', $start+1 );
1147
1148
1149
1150     # Set the max hits
1151
1152     my $page_size = $self->config('page_size') || 15;
1153     $self->swish_command( '-m', $page_size );
1154
1155
1156     return unless $self->set_index_file;
1157
1158
1159     # Set the sort option, if any