| 1 |
#!@@perlbinary@@ -w |
|---|
| 2 |
package SwishSearch; |
|---|
| 3 |
use strict; |
|---|
| 4 |
|
|---|
| 5 |
# This is set to where Swish-e's "make install" installed the helper modules. |
|---|
| 6 |
use lib ( '@@perlmoduledir@@' ); |
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
my $DEFAULT_CONFIG_FILE = '.swishcgi.conf'; |
|---|
| 10 |
|
|---|
| 11 |
################################################################################### |
|---|
| 12 |
# |
|---|
| 13 |
# If this text is displayed on your browser then your web server |
|---|
| 14 |
# is not configured to run .cgi programs. Contact your web server administrator. |
|---|
| 15 |
# |
|---|
| 16 |
# To display documentation for this program type "perldoc swish.cgi" |
|---|
| 17 |
# |
|---|
| 18 |
# swish.cgi $Revision$ Copyright (C) 2001 Bill Moseley swishscript@hank.org |
|---|
| 19 |
# Example CGI program for searching with SWISH-E |
|---|
| 20 |
# |
|---|
| 21 |
# This example program will only run under an OS that supports fork(). |
|---|
| 22 |
# Under windows it uses a piped open which MAY NOT BE SECURE. |
|---|
| 23 |
# |
|---|
| 24 |
# |
|---|
| 25 |
# This program is free software; you can redistribute it and/or |
|---|
| 26 |
# modify it under the terms of the GNU General Public License |
|---|
| 27 |
# as published by the Free Software Foundation; either version |
|---|
| 28 |
# 2 of the License, or (at your option) any later version. |
|---|
| 29 |
# |
|---|
| 30 |
# This program is distributed in the hope that it will be useful, |
|---|
| 31 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 32 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 33 |
# GNU General Public License for more details. |
|---|
| 34 |
# |
|---|
| 35 |
# The above lines must remain at the top of this program |
|---|
| 36 |
# |
|---|
| 37 |
# $Id$ |
|---|
| 38 |
# |
|---|
| 39 |
#################################################################################### |
|---|
| 40 |
|
|---|
| 41 |
# This is written this way so the script can be used as a CGI script or a mod_perl |
|---|
| 42 |
# module without any code changes. |
|---|
| 43 |
|
|---|
| 44 |
# use CGI (); # might not be needed if using Apache::Request |
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 |
#================================================================================= |
|---|
| 48 |
# CGI entry point |
|---|
| 49 |
# |
|---|
| 50 |
#================================================================================= |
|---|
| 51 |
|
|---|
| 52 |
|
|---|
| 53 |
use vars '$speedy_config'; # Global for caching in persistent environment such as SpeedyCGI |
|---|
| 54 |
|
|---|
| 55 |
# Run the script -- entry point if running as a CGI script |
|---|
| 56 |
|
|---|
| 57 |
unless ( $ENV{MOD_PERL} ) { |
|---|
| 58 |
if ( !$speedy_config ) { |
|---|
| 59 |
$speedy_config = default_config(); |
|---|
| 60 |
|
|---|
| 61 |
# Merge with disk config file. |
|---|
| 62 |
$speedy_config = merge_read_config( $speedy_config ); |
|---|
| 63 |
} |
|---|
| 64 |
|
|---|
| 65 |
process_request( $speedy_config ); |
|---|
| 66 |
} |
|---|
| 67 |
|
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 |
|
|---|
| 71 |
#================================================================================== |
|---|
| 72 |
# This sets the default configuration parameters |
|---|
| 73 |
# |
|---|
| 74 |
# Any configuration read from disk is merged with these settings. |
|---|
| 75 |
# |
|---|
| 76 |
# Only a few settings are actually required. Some reasonable defaults are used |
|---|
| 77 |
# for most. If fact, you can probably create a complete config as: |
|---|
| 78 |
# |
|---|
| 79 |
# return = { |
|---|
| 80 |
# swish_binary => '/usr/local/bin/swish-e', |
|---|
| 81 |
# swish_index => '/usr/local/share/swish/index.swish-e', |
|---|
| 82 |
# title_property => 'swishtitle', # Not required, but recommended |
|---|
| 83 |
# }; |
|---|
| 84 |
# |
|---|
| 85 |
# But, that doesn't really show all the options. |
|---|
| 86 |
# |
|---|
| 87 |
# You can modify the options below, or you can use a config file. The config file |
|---|
| 88 |
# is .swishcgi.conf by default (read from the current directory) that must return |
|---|
| 89 |
# a hash reference. For example, to create a config file that changes the default |
|---|
| 90 |
# title and index file name, plus uses Template::Toolkit to generate output |
|---|
| 91 |
# create a config file as: |
|---|
| 92 |
# |
|---|
| 93 |
# # Example config file -- returns a hash reference |
|---|
| 94 |
# return { |
|---|
| 95 |
# title => 'Search Our Site', |
|---|
| 96 |
# swish_index => 'index.web', |
|---|
| 97 |
# |
|---|
| 98 |
# template => { |
|---|
| 99 |
# package => 'SWISH::TemplateToolkit', |
|---|
| 100 |
# file => 'swish.tt', |
|---|
| 101 |
# options => { |
|---|
| 102 |
# INCLUDE_PATH => '/home/user/swish-e/example', |
|---|
| 103 |
# }, |
|---|
| 104 |
# }, |
|---|
| 105 |
# }; |
|---|
| 106 |
# |
|---|
| 107 |
# |
|---|
| 108 |
#----------------------------------------------------------------------------------- |
|---|
| 109 |
|
|---|
| 110 |
sub default_config { |
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 |
|
|---|
| 114 |
##### Configuration Parameters ######### |
|---|
| 115 |
|
|---|
| 116 |
#---- This lists all the options, with many commented out --- |
|---|
| 117 |
# By default, this config is used -- see the process_request() call below. |
|---|
| 118 |
|
|---|
| 119 |
# You should adjust for your site, and how your swish index was created. |
|---|
| 120 |
|
|---|
| 121 |
##>> |
|---|
| 122 |
##>> Please don't post this entire section on the swish-e list if looking for help! |
|---|
| 123 |
##>> |
|---|
| 124 |
##>> Send a small example, without all the comments. |
|---|
| 125 |
|
|---|
| 126 |
#====================================================================== |
|---|
| 127 |
# *** NOTES **** |
|---|
| 128 |
# Items beginning with an "x" or "#" are commented out |
|---|
| 129 |
# the "x" form simply renames (hides) that setting. It's used |
|---|
| 130 |
# to make it easy to disable a mult-line configuation setting. |
|---|
| 131 |
# |
|---|
| 132 |
# If you do not understand a setting then best to leave the default. |
|---|
| 133 |
# |
|---|
| 134 |
# Please follow the documentation (perldoc swish.cgi) and set up |
|---|
| 135 |
# a test using the defaults before making changes. It's much easier |
|---|
| 136 |
# to modify a working example than to try to get a modified example to work... |
|---|
| 137 |
# |
|---|
| 138 |
# Again, this is a Perl hash structure. Commas are important. |
|---|
| 139 |
#====================================================================== |
|---|
| 140 |
|
|---|
| 141 |
|
|---|
| 142 |
return { |
|---|
| 143 |
title => 'Search our site', # Title of your choice. Displays on the search page |
|---|
| 144 |
swish_binary => '@@swishbinary@@', # Location of swish-e binary |
|---|
| 145 |
|
|---|
| 146 |
|
|---|
| 147 |
# By default, this script tries to read a config file. You should probably |
|---|
| 148 |
# comment this out if not used save a disk stat |
|---|
| 149 |
config_file => $DEFAULT_CONFIG_FILE, # Default config file |
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
# The location of your index file. Typically, this would not be in |
|---|
| 153 |
# your web tree. |
|---|
| 154 |
# If you have more than one index to search then specify an array |
|---|
| 155 |
# reference. e.g. swish_index =>[ qw( index1 index2 index3 )], |
|---|
| 156 |
|
|---|
| 157 |
swish_index => 'index.swish-e', # Location of your index file |
|---|
| 158 |
# See "select_indexes" below for how to |
|---|
| 159 |
# select more than one index. |
|---|
| 160 |
|
|---|
| 161 |
page_size => 15, # Number of results per page - default 15 |
|---|
| 162 |
|
|---|
| 163 |
|
|---|
| 164 |
|
|---|
| 165 |
|
|---|
| 166 |
# prepend this path to the filename (swishdocpath) returned by swish. This is used to |
|---|
| 167 |
# make the href link back to the original document. Comment out to disable. |
|---|
| 168 |
|
|---|
| 169 |
#prepend_path => 'http://localhost/mydocs', |
|---|
| 170 |
|
|---|
| 171 |
|
|---|
| 172 |
|
|---|
| 173 |
# This is the property that is used for the href link back to the original |
|---|
| 174 |
# document. It's "swishdocpath" by default |
|---|
| 175 |
|
|---|
| 176 |
#link_property => 'swishdocpath', |
|---|
| 177 |
|
|---|
| 178 |
|
|---|
| 179 |
## Display properties ## |
|---|
| 180 |
|
|---|
| 181 |
# Everything swish records about a file is called a "property". These |
|---|
| 182 |
# next three settings tell the swish.cgi script which properties should be passed |
|---|
| 183 |
# to the templating coded for output generation. |
|---|
| 184 |
|
|---|
| 185 |
|
|---|
| 186 |
# First is the property name to use as the main link text to the indexed document. |
|---|
| 187 |
# Typically, this will be 'swishtitle' if have indexed html documents, |
|---|
| 188 |
# but you can specify any PropertyName defined in your document. |
|---|
| 189 |
# By default, swish will display the pathname for documents that do not |
|---|
| 190 |
# have a title. |
|---|
| 191 |
# In other words, this is used for the text of the links of the search results. |
|---|
| 192 |
# <a href="prepend_path/swishdocpath">title_property</a> |
|---|
| 193 |
|
|---|
| 194 |
title_property => 'swishtitle', |
|---|
| 195 |
|
|---|
| 196 |
|
|---|
| 197 |
|
|---|
| 198 |
# Swish has a configuration directive "StoreDescription" that will save part or |
|---|
| 199 |
# all of a document's contents in the index file. This can then be displayed |
|---|
| 200 |
# along with results. If you are indexing a lot of files this can use a lot of disk |
|---|
| 201 |
# space, so test carefully before indexing your entire site. |
|---|
| 202 |
# Building swish with zlib can greatly reduce the space used by StoreDescription. |
|---|
| 203 |
# |
|---|
| 204 |
# This settings tells this script to display this property as the description. |
|---|
| 205 |
# Normally, this should be 'swishdescription', but you can specify another property name. |
|---|
| 206 |
# There is no default. |
|---|
| 207 |
|
|---|
| 208 |
description_prop => 'swishdescription', |
|---|
| 209 |
|
|---|
| 210 |
|
|---|
| 211 |
|
|---|
| 212 |
# Property names listed here will be displayed in a table below each result |
|---|
| 213 |
# You may wish to modify this list if you are using document properties (PropertyNames) |
|---|
| 214 |
# in your swish-e index configuration |
|---|
| 215 |
# There is no default. |
|---|
| 216 |
|
|---|
| 217 |
display_props => [qw/swishlastmodified swishdocsize swishdocpath/], |
|---|
| 218 |
|
|---|
| 219 |
|
|---|
| 220 |
|
|---|
| 221 |
|
|---|
| 222 |
|
|---|
| 223 |
# Results can be be sorted by any of the properties listed here |
|---|
| 224 |
# They will be displayed in a drop-down list on the form. |
|---|
| 225 |
# You may modify this list if you are using document properties of your own creation |
|---|
| 226 |
# Swish uses the rank as the default sort |
|---|
| 227 |
|
|---|
| 228 |
sorts => [qw/swishrank swishlastmodified swishtitle swishdocpath/], |
|---|
| 229 |
|
|---|
| 230 |
|
|---|
| 231 |
# Secondary_sort is used to sort within a sort |
|---|
| 232 |
# You may enter a property name followed by a direction (asc|desc) |
|---|
| 233 |
|
|---|
| 234 |
secondary_sort => [qw/swishlastmodified desc/], |
|---|
| 235 |
|
|---|
| 236 |
|
|---|
| 237 |
|
|---|
| 238 |
|
|---|
| 239 |
# You can limit by MetaNames here. Names listed here will be displayed in |
|---|
| 240 |
# a line of radio buttons. |
|---|
| 241 |
# The default is to not allow any metaname selection. |
|---|
| 242 |
# To use this feature you must define MetaNames while indexing. |
|---|
| 243 |
|
|---|
| 244 |
# The special "swishdefault" says to search any text that was not indexed |
|---|
| 245 |
# as a specific metaname (e.g. typically the body of a HTML document and its title). |
|---|
| 246 |
|
|---|
| 247 |
# To see how this might work, add to your *swish-e* config file: |
|---|
| 248 |
# MetaNames swishtitle swishdocpath |
|---|
| 249 |
# reindex and try: |
|---|
| 250 |
|
|---|
| 251 |
metanames => [qw/ swishdefault swishtitle swishdocpath /], |
|---|
| 252 |
|
|---|
| 253 |
# Add "all" to this list to test the meta_groups feature described below |
|---|
| 254 |
|
|---|
| 255 |
|
|---|
| 256 |
|
|---|
| 257 |
# Another example: if you indexed an email archive |
|---|
| 258 |
# that defined the metanames subject name email (as in the swish-e discussion archive) |
|---|
| 259 |
# you might use: |
|---|
| 260 |
#metanames => [qw/body subject name email/], |
|---|
| 261 |
|
|---|
| 262 |
|
|---|
| 263 |
# Searching multiple meta names: |
|---|
| 264 |
|
|---|
| 265 |
# You can also group metanames into "meta-metanames". |
|---|
| 266 |
# Example: Say you defined metanames "author", "comment" and "keywords" |
|---|
| 267 |
# You want to allow searching "author", "comment" and the document body ("swishdefault") |
|---|
| 268 |
# But you would also like an "all" search that searches all metanames, including "keywords": |
|---|
| 269 |
# |
|---|
| 270 |
# metanames => [qw/swishdefault author comment all/], |
|---|
| 271 |
# |
|---|
| 272 |
# Now, the "all" metaname is not a real metaname. It must be expanded into its |
|---|
| 273 |
# individual metanames using meta_groups: |
|---|
| 274 |
# |
|---|
| 275 |
# "meta_groups" maps a fake metaname to a list of real metanames |
|---|
| 276 |
# |
|---|
| 277 |
# meta_groups => { |
|---|
| 278 |
# all => [qw/swishdefault author comment keywords / ], |
|---|
| 279 |
# }, |
|---|
| 280 |
# |
|---|
| 281 |
# swish.cgi will then take a query like |
|---|
| 282 |
# |
|---|
| 283 |
# all=(query words) |
|---|
| 284 |
# |
|---|
| 285 |
# and create the query |
|---|
| 286 |
# |
|---|
| 287 |
# swishdefault=(query words) OR author=(query words) OR comment=(query words) OR keywords=(query words) |
|---|
| 288 |
# |
|---|
| 289 |
# This is not ideal, but should work for most cases |
|---|
| 290 |
# (might fail under windows since the query is passed through the shell). |
|---|
| 291 |
|
|---|
| 292 |
# To enable this group add "all" to the list of metanames above |
|---|
| 293 |
|
|---|
| 294 |
meta_groups => { |
|---|
| 295 |
all => [qw/swishdefault swishtitle swishdocpath/], |
|---|
| 296 |
}, |
|---|
| 297 |
|
|---|
| 298 |
# Note that you can use other words than "all". The script just checks if a given metaname is |
|---|
| 299 |
# listed in "meta_groups" and expands as needed. |
|---|
| 300 |
|
|---|
| 301 |
|
|---|
| 302 |
# "name_labels" is used to map MetaNames and PropertyNames to user-friendly names |
|---|
| 303 |
# on the CGI form. |
|---|
| 304 |
|
|---|
| 305 |
name_labels => { |
|---|
| 306 |
swishdefault => 'Title & Body', |
|---|
| 307 |
swishtitle => 'Title', |
|---|
| 308 |
swishrank => 'Rank', |
|---|
| 309 |
swishlastmodified => 'Last Modified Date', |
|---|
| 310 |
swishdocpath => 'Document Path', |
|---|
| 311 |
swishdocsize => 'Document Size', |
|---|
| 312 |
all => 'All', # group of metanames |
|---|
| 313 |
subject => 'Message Subject', # other examples |
|---|
| 314 |
name => "Poster's Name", |
|---|
| 315 |
email => "Poster's Email", |
|---|
| 316 |
sent => 'Message Date', |
|---|
| 317 |
}, |
|---|
| 318 |
|
|---|
| 319 |
|
|---|
| 320 |
timeout => 10, # limit time used by swish when fetching results - DoS protection. |
|---|
| 321 |
# does not work under Windows |
|---|
| 322 |
|
|---|
| 323 |
max_query_length => 100, # limit length of query string. Swish also has a limit (default is 40) |
|---|
| 324 |
# You might want to set swish-e's limit higher, and use this to get a |
|---|
| 325 |
# somewhat more friendly message. |
|---|
| 326 |
|
|---|
| 327 |
|
|---|
| 328 |
|
|---|
| 329 |
|
|---|
| 330 |
|
|---|
| 331 |
max_chars => 500, # Limits the size of the description_prop if it is not highlighted |
|---|
| 332 |
|
|---|
| 333 |
# This structure defines term highlighting, and what type of highlighting to use |
|---|
| 334 |
# If you are using metanames in your searches and they map to properties that you |
|---|
| 335 |
# will display, you may need to adjust the "meta_to_prop_map". |
|---|
| 336 |
|
|---|
| 337 |
highlight => { |
|---|
| 338 |
|
|---|
| 339 |
# Pick highlighting module -- you must make sure the module can be found |
|---|
| 340 |
# The highlighting modules are in the example/modules directory by default |
|---|
| 341 |
|
|---|
| 342 |
# Ok speed, but doesn't handle phrases or stopwords |
|---|
| 343 |
# Deals with stemming, and shows words in context |
|---|
| 344 |
# Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars. |
|---|
| 345 |
#package => 'SWISH::DefaultHighlight', |
|---|
| 346 |
|
|---|
| 347 |
# Somewhat slow, but deals with phases, stopwords, and stemming. |
|---|
| 348 |
# Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars. |
|---|
| 349 |
package => 'SWISH::PhraseHighlight', |
|---|
| 350 |
|
|---|
| 351 |
# Faster: phrases without regard to wordcharacter settings |
|---|
| 352 |
# doesn't do context display, so must match in first X words, so may not even highlight |
|---|
| 353 |
# doesn't handle stemming or stopwords. |
|---|
| 354 |
#package => 'SWISH::SimpleHighlight', |
|---|
| 355 |
|
|---|
| 356 |
show_words => 10, # Number of "swish words" words to show around highlighted word |
|---|
| 357 |
max_words => 100, # If no words are found to highlighted then show this many words |
|---|
| 358 |
occurrences => 6, # Limit number of occurrences of highlighted words |
|---|
| 359 |
#highlight_on => '<b>', # HTML highlighting codes |
|---|
| 360 |
#highlight_off => '</b>', |
|---|
| 361 |
highlight_on => '<font style="background:#FFFF99">', |
|---|
| 362 |
highlight_off => '</font>', |
|---|
| 363 |
|
|---|
| 364 |
# This maps (real) search metatags to display properties. |
|---|
| 365 |
# e.g. if searching in "swishdefault" then highlight in the |
|---|
| 366 |
# swishtitle and swishdescription properties |
|---|
| 367 |
# Do not include "fake" metanames defined with meta_groups, just |
|---|
| 368 |
# list the real metanames used in your index, and the properties they |
|---|
| 369 |
# relate to. |
|---|
| 370 |
|
|---|
| 371 |
meta_to_prop_map => { |
|---|
| 372 |
swishdefault => [ qw/swishtitle swishdescription/ ], |
|---|
| 373 |
swishtitle => [ qw/swishtitle/ ], |
|---|
| 374 |
swishdocpath => [ qw/swishdocpath/ ], |
|---|
| 375 |
}, |
|---|
| 376 |
}, |
|---|
| 377 |
|
|---|
| 378 |
|
|---|
| 379 |
|
|---|
| 380 |
# If you specify more than one index file (as an array reference) you |
|---|
| 381 |
# can set this allow selection of which indexes to search. |
|---|
| 382 |
# The default is to search all indexes specified if this is not used. |
|---|
| 383 |
# When used, the first index is the default index. |
|---|
| 384 |
|
|---|
| 385 |
# You need to specify your indexes as an array reference: |
|---|
| 386 |
#swish_index => [ qw/ index.swish-e index.other index2.other index3.other index4.other / ], |
|---|
| 387 |
|
|---|
| 388 |
Xselect_indexes => { |
|---|
| 389 |
# pick radio_group, popup_menu, or checkbox_group |
|---|
| 390 |
method => 'checkbox_group', |
|---|
| 391 |
#method => 'radio_group', |
|---|
| 392 |
#method => 'popup_menu', |
|---|
| 393 |
|
|---|
| 394 |
columns => 3, |
|---|
| 395 |
# labels must match up one-to-one with elements in "swish_index" |
|---|
| 396 |
labels => [ 'Main Index', 'Other Index', qw/ two three four/ ], |
|---|
| 397 |
description => 'Select Site: ', |
|---|
| 398 |
|
|---|
| 399 |
# Optional - Set the default index if none is selected |
|---|
| 400 |
# This needs to be an index file name listed in swish_index |
|---|
| 401 |
# above, not a label |
|---|
| 402 |
|
|---|
| 403 |
default_index => '', |
|---|
| 404 |
}, |
|---|
| 405 |
|
|---|
| 406 |
|
|---|
| 407 |
# Similar to select_indexes, this adds a metaname search |
|---|
| 408 |
# based on a metaname. You can use any metaname, and this will |
|---|
| 409 |
# add an "AND" search to limit results to a subset of your records. |
|---|
| 410 |
# i.e. it adds something like 'site=(foo or bar or baz)' if foo, bar, and baz were selected. |
|---|
| 411 |
|
|---|
| 412 |
# This really just allows you to limit existing searches by a metaname, instead of |
|---|
| 413 |
# selecting a metaname (with metanames option above). |
|---|
| 414 |
|
|---|
| 415 |
# Swish-e's ExtractPath would work well with this. For example, |
|---|
| 416 |
# to allow limiting searches to specific sections of the apache docs use this |
|---|
| 417 |
# in your swish-e config file: |
|---|
| 418 |
# ExtractPath site regex !^/usr/local/apache/htdocs/manual/([^/]+)/.+$!$1! |
|---|
| 419 |
# ExtractPathDefault site other |
|---|
| 420 |
# which extracts the segment of the path after /manual/ and indexes that name |
|---|
| 421 |
# under the metaname "site". Then searches can be limited to files with that |
|---|
| 422 |
# path (e.g. query would be swishdefault=foo AND site=vhosts to limit searches |
|---|
| 423 |
# to the virtual host section. |
|---|
| 424 |
|
|---|
| 425 |
|
|---|
| 426 |
Xselect_by_meta => { |
|---|
| 427 |
#method => 'radio_group', # pick: radio_group, popup_menu, or checkbox_group |
|---|
| 428 |
method => 'checkbox_group', |
|---|
| 429 |
#method => 'popup_menu', |
|---|
| 430 |
columns => 3, |
|---|
| 431 |
metaname => 'site', # Can't be a metaname used elsewhere! |
|---|
| 432 |
values => [qw/misc mod vhosts other/], |
|---|
| 433 |
labels => { |
|---|
| 434 |
misc => 'General Apache docs', |
|---|
| 435 |
mod => 'Apache Modules', |
|---|
| 436 |
vhosts => 'Virtual hosts', |
|---|
| 437 |
}, |
|---|
| 438 |
description => 'Limit search to these areas: ', |
|---|
| 439 |
}, |
|---|
| 440 |
|
|---|
| 441 |
|
|---|
| 442 |
|
|---|
| 443 |
# The 'template' setting defines what generates the output |
|---|
| 444 |
# The default is "TemplateDefault" which is reasonably ugly, |
|---|
| 445 |
# but does not require installation of a separate templating system. |
|---|
| 446 |
|
|---|
| 447 |
# Note that some of the above options may not be available |
|---|
| 448 |
# for templating, as it's up to you to layout the form |
|---|
| 449 |
# and swish-e results in your template. |
|---|
| 450 |
|
|---|
| 451 |
# TemplateDefault is the default |
|---|
| 452 |
|
|---|
| 453 |
xtemplate => { |
|---|
| 454 |
package => 'SWISH::TemplateDefault', |
|---|
| 455 |
}, |
|---|
| 456 |
|
|---|
| 457 |
xtemplate => { |
|---|
| 458 |
package => 'SWISH::TemplateDumper', |
|---|
| 459 |
}, |
|---|
| 460 |
|
|---|
| 461 |
xtemplate => { |
|---|
| 462 |
package => 'SWISH::TemplateToolkit', |
|---|
| 463 |
file => 'swish.tt', |
|---|
| 464 |
options => { |
|---|
| 465 |
INCLUDE_PATH => '@@pkgdatadir@@', |
|---|
| 466 |
#PRE_PROCESS => 'config', |
|---|
| 467 |
}, |
|---|
| 468 |
}, |
|---|
| 469 |
|
|---|
| 470 |
xtemplate => { |
|---|
| 471 |
package => 'SWISH::TemplateHTMLTemplate', |
|---|
| 472 |
options => { |
|---|
| 473 |
filename => 'swish.tmpl', |
|---|
| 474 |
path => '@@pkgdatadir@@', |
|---|
| 475 |
die_on_bad_params => 0, |
|---|
| 476 |
loop_context_vars => 1, |
|---|
| 477 |
cache => 1, |
|---|
| 478 |
}, |
|---|
| 479 |
}, |
|---|
| 480 |
|
|---|
| 481 |
|
|---|
| 482 |
|
|---|
| 483 |
# The "on_intranet" setting is just a flag that can be used to say you do |
|---|
| 484 |
# not have an external internet connection. It's here because the default |
|---|
| 485 |
# page generation includes links to images on swish-e.or and on www.w3.org. |
|---|
| 486 |
# If this is set to one then those images will not be shown. |
|---|
| 487 |
# (This only effects the default ouput module SWISH::TemplateDefault) |
|---|
| 488 |
|
|---|
| 489 |
on_intranet => 0, |
|---|
| 490 |
|
|---|
| 491 |
|
|---|
| 492 |
|
|---|
| 493 |
# Here you can hard-code debugging options. The will help you find |
|---|
| 494 |
# where you made your mistake ;) |
|---|
| 495 |
# Using all at once will generate a lot of messages to STDERR |
|---|
| 496 |
# Please see the documentation before using these. |
|---|
| 497 |
# Typically, you will set these from the command line instead of in the configuration. |
|---|
| 498 |
|
|---|
| 499 |
# debug_options => 'basic, command, headers, output, summary, dump', |
|---|
| 500 |
|
|---|
| 501 |
|
|---|
| 502 |
|
|---|
| 503 |
# This defines the package object for reading CGI parameters |
|---|
| 504 |
# Defaults to CGI. Might be useful with mod_perl. |
|---|
| 505 |
# request_package => 'CGI', |
|---|
| 506 |
# request_package => 'Apache::Request', |
|---|
| 507 |
|
|---|
| 508 |
|
|---|
| 509 |
# use_library => 1, # set true and will use the SWISH::API module |
|---|
| 510 |
# will cache based on index files when running under mod_perl |
|---|
| 511 |
|
|---|
| 512 |
|
|---|
| 513 |
# Minor adjustment to page display. The page navigation normally looks like: |
|---|
| 514 |
# Page: 1 5 6 7 8 9 24 |
|---|
| 515 |
# where the first page and last page are always displayed. These can be disabled by |
|---|
| 516 |
# by setting to true values ( 1 ) |
|---|
| 517 |
|
|---|
| 518 |
no_first_page_navigation => 0, |
|---|
| 519 |
no_last_page_navigation => 0, |
|---|
| 520 |
num_pages_to_show => 12, # number of pages to offer |
|---|
| 521 |
|
|---|
| 522 |
|
|---|
| 523 |
|
|---|
| 524 |
|
|---|
| 525 |
# Limit to date ranges |
|---|
| 526 |
|
|---|
| 527 |
|
|---|
| 528 |
|
|---|
| 529 |
# This adds in the date_range limiting options |
|---|
| 530 |
# You will need the DateRanges.pm module from the author to use that feature |
|---|
| 531 |
|
|---|
| 532 |
# Noramlly, you will want to limit by the last modified date, so specify |
|---|
| 533 |
# "swishlastmodified" as the property_name. If indexing a mail archive, and, for |
|---|
| 534 |
# example, you store the date (a unix timestamp) as "date" then specify |
|---|
| 535 |
# "date" as the property_name. |
|---|
| 536 |
|
|---|
| 537 |
date_ranges => { |
|---|
| 538 |
property_name => 'swishlastmodified', # property name to limit by |
|---|
| 539 |
|
|---|
| 540 |
# what you specify here depends on the DateRanges.pm module. |
|---|
| 541 |
time_periods => [ |
|---|
| 542 |
'All', |
|---|
| 543 |
'Today', |
|---|
| 544 |
'Yesterday', |
|---|
| 545 |
#'Yesterday onward', |
|---|
| 546 |
'This Week', |
|---|
| 547 |
'Last Week', |
|---|
| 548 |
'Last 90 Days', |
|---|
| 549 |
'This Month', |
|---|
| 550 |
'Last Month', |
|---|
| 551 |
#'Past', |
|---|
| 552 |
#'Future', |
|---|
| 553 |
#'Next 30 Days', |
|---|
| 554 |
], |
|---|
| 555 |
|
|---|
| 556 |
line_break => 0, |
|---|
| 557 |
default => 'All', |
|---|
| 558 |
date_range => 1, |
|---|
| 559 |
}, |
|---|
| 560 |
|
|---|
| 561 |
|
|---|
| 562 |
# This is suppose to reduce the load on systems if hit with a large number |
|---|
| 563 |
# of requests. Although this will limit the number of swish-e processes run |
|---|
| 564 |
# it will not limit the number of CGI requests. I feel like a better solution |
|---|
| 565 |
# is to use mod_perl (with the SWISH::API module). |
|---|
| 566 |
# I also think that running /bin/ps for every is not ideal. |
|---|
| 567 |
|
|---|
| 568 |
# This only works on unix-based systems when running the swish-e binary. |
|---|
| 569 |
# It greps /swish-e/ from the output of ps and aborts if the count is < limit_procs |
|---|
| 570 |
|
|---|
| 571 |
# Set max number of swish-e binaries and ps command to run |
|---|
| 572 |
limit_procs => 0, # max number of swish process to run (zero to not limit) |
|---|
| 573 |
ps_prog => '/bin/ps -Unobody -ocommand', # command to list number of swish binaries |
|---|
| 574 |
|
|---|
| 575 |
}; |
|---|
| 576 |
|
|---|
| 577 |
} |
|---|
| 578 |
|
|---|
| 579 |
#^^^^^^^^^^^^^^^^^^^^^^^^^ end of user config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|---|
| 580 |
#======================================================================================== |
|---|
| 581 |
|
|---|
| 582 |
|
|---|
| 583 |
|
|---|
| 584 |
#================================================================================= |
|---|
| 585 |
# mod_perl entry point |
|---|
| 586 |
# |
|---|
| 587 |
# As an example, you might use a PerlSetVar to point to paths to different |
|---|
| 588 |
# config files, and then cache the different configurations by path. |
|---|
| 589 |
# |
|---|
| 590 |
#================================================================================= |
|---|
| 591 |
|
|---|
| 592 |
my %cached_configs; |
|---|
| 593 |
|
|---|
| 594 |
sub handler { |
|---|
| 595 |
my $r = shift; |
|---|
| 596 |
|
|---|
| 597 |
if ( my $config_path = $r->dir_config( 'Swish_Conf_File' ) ) { |
|---|
| 598 |
|
|---|
| 599 |
# Already cached? |
|---|
| 600 |
# Note that this is cached for the life of the server -- must restart if want to change config |
|---|
| 601 |
|
|---|
| 602 |
if ( $cached_configs{ $config_path } ) { |
|---|
| 603 |
process_request( $cached_configs{ $config_path } ); |
|---|
| 604 |
return Apache::Constants::OK(); |
|---|
| 605 |
} |
|---|
| 606 |
|
|---|
| 607 |
|
|---|
| 608 |
# Else, load config |
|---|
| 609 |
my $config = default_config(); |
|---|
| 610 |
$config->{config_file} = $config_path; |
|---|
| 611 |
|
|---|
| 612 |
# Merge with disk config file. |
|---|
| 613 |
$cached_configs{ $config_path } = merge_read_config( $config ); |
|---|
| 614 |
|
|---|
| 615 |
process_request( $cached_configs{ $config_path } ); |
|---|
| 616 |
return Apache::Constants::OK(); |
|---|
| 617 |
} |
|---|
| 618 |
|
|---|
| 619 |
|
|---|
| 620 |
# Otherwise, use hard-coded config |
|---|
| 621 |
my $config = default_config(); |
|---|
| 622 |
|
|---|
| 623 |
# Merge with disk config file. |
|---|
| 624 |
$config = merge_read_config( $config ); |
|---|
| 625 |
|
|---|
| 626 |
process_request( default_config() ); |
|---|
| 627 |
|
|---|
| 628 |
return Apache::Constants::OK(); |
|---|
| 629 |
|
|---|
| 630 |
} |
|---|
| 631 |
|
|---|
| 632 |
|
|---|
| 633 |
#============================================================================ |
|---|
| 634 |
# Read config settings from disk, and merge |
|---|
| 635 |
# Note, all errors are ignored since by default this script looks for a |
|---|
| 636 |
# config file. |
|---|
| 637 |
# |
|---|
| 638 |
#============================================================================ |
|---|
| 639 |
sub merge_read_config { |
|---|
| 640 |
my $config = shift; |
|---|
| 641 |
|
|---|
| 642 |
|
|---|
| 643 |
set_default_debug_flags(); |
|---|
| 644 |
|
|---|
| 645 |
set_debug($config); # get from config or from %ENV |
|---|
| 646 |
|
|---|
| 647 |
|
|---|
| 648 |
return $config unless $config->{config_file}; |
|---|
| 649 |
|
|---|
| 650 |
my $return = do $config->{config_file}; # load the config file |
|---|
| 651 |
|
|---|
| 652 |
unless ( ref $return eq 'HASH' ) { |
|---|
| 653 |
|
|---|
| 654 |
# First, let's check for file not found for the default config, which we can ignore |
|---|
| 655 |
|
|---|
| 656 |
my $error = $@ || $!; |
|---|
| 657 |
|
|---|
| 658 |
if ( $config->{config_file} eq $DEFAULT_CONFIG_FILE && !-e $config->{config_file} ) { |
|---|
| 659 |
warn "Config file '$config->{config_file}': $!" if $config->{debug}; |
|---|
| 660 |
return $config; |
|---|
| 661 |
} |
|---|
| 662 |
|
|---|
| 663 |
die "Config file '$config->{config_file}': $error"; |
|---|
| 664 |
} |
|---|
| 665 |
|
|---|
| 666 |
|
|---|
| 667 |
|
|---|
| 668 |
if ( $config->{debug} || $return->{debug} ) { |
|---|
| 669 |
require Data::Dumper; |
|---|
| 670 |
print STDERR "\n---------- Read config parameters from '$config->{config_file}' ------\n", |
|---|
| 671 |
Data::Dumper::Dumper($return), |
|---|
| 672 |
"-------------------------\n"; |
|---|
| 673 |
} |
|---|
| 674 |
|
|---|
| 675 |
set_debug( $return ); |
|---|
| 676 |
|
|---|
| 677 |
|
|---|
| 678 |
# Merge settings |
|---|
| 679 |
return { %$config, %$return }; |
|---|
| 680 |
} |
|---|
| 681 |
|
|---|
| 682 |
#-------------------------------------------------------------------------------------------------- |
|---|
| 683 |
sub set_default_debug_flags { |
|---|
| 684 |
# Debug flags defined |
|---|
| 685 |
|
|---|
| 686 |
$SwishSearch::DEBUG_BASIC = 1; # Show command used to run swish |
|---|
| 687 |
$SwishSearch::DEBUG_COMMAND = 2; # Show command used to run swish |
|---|
| 688 |
$SwishSearch::DEBUG_HEADERS = 4; # Swish output headers |
|---|
| 689 |
$SwishSearch::DEBUG_OUTPUT = 8; # Swish output besides headers |
|---|
| 690 |
$SwishSearch::DEBUG_SUMMARY = 16; # Summary of results parsed |
|---|
| 691 |
$SwishSearch::DEBUG_RESULTS = 32; # Detail of results parsed |
|---|
| 692 |
$SwishSearch::DEBUG_DUMP_DATA = 64; # dump data that is sent to templating modules |
|---|
| 693 |
} |
|---|
| 694 |
|
|---|
| 695 |
|
|---|
| 696 |
|
|---|
| 697 |
|
|---|
| 698 |
#--------------------------------------------------------------------------------------------------- |
|---|
| 699 |
sub set_debug { |
|---|
| 700 |
my $conf = shift; |
|---|
| 701 |
|
|---|
| 702 |
$conf->{debug} = 0; |
|---|
| 703 |
|
|---|
| 704 |
my $debug_string = $ENV{SWISH_DEBUG} ||$conf->{debug_options}; |
|---|
| 705 |
return unless $debug_string; |
|---|
| 706 |
|
|---|
| 707 |
|
|---|
| 708 |
my %debug = ( |
|---|
| 709 |
basic => [$SwishSearch::DEBUG_BASIC, 'Basic debugging'], |
|---|
| 710 |
command => [$SwishSearch::DEBUG_COMMAND, 'Show command used to run swish'], |
|---|
| 711 |
headers => [$SwishSearch::DEBUG_HEADERS, 'Show headers returned from swish'], |
|---|
| 712 |
output => [$SwishSearch::DEBUG_OUTPUT, 'Show output from swish'], |
|---|
| 713 |
summary => [$SwishSearch::DEBUG_SUMMARY, 'Show summary of results'], |
|---|
| 714 |
results => [$SwishSearch::DEBUG_RESULTS, 'Show detail of results'], |
|---|
| 715 |
dump => [$SwishSearch::DEBUG_DUMP_DATA, 'Show all data available to templates'], |
|---|
| 716 |
); |
|---|
| 717 |
|
|---|
| 718 |
|
|---|
| 719 |
$conf->{debug} = 1; |
|---|
| 720 |
|
|---|
| 721 |
my @debug_str; |
|---|
| 722 |
|
|---|
| 723 |
for ( split /\s*,\s*/, $debug_string ) { |
|---|
| 724 |
if ( exists $debug{ lc $_ } ) { |
|---|
| 725 |
push @debug_str, lc $_; |
|---|
| 726 |
$conf->{debug} |= $debug{ lc $_ }->[0]; |
|---|
| 727 |
next; |
|---|
| 728 |
} |
|---|
| 729 |
|
|---|
| 730 |
print STDERR "Unknown debug option '$_'. Must be one of:\n", |
|---|
| 731 |
join( "\n", map { sprintf(' %10s: %10s', $_, $debug{$_}->[1]) } sort { $debug{$a}->[0] <=> $debug{$b}->[0] }keys %debug), |
|---|
| 732 |
"\n\n"; |
|---|
| 733 |
exit; |
|---|
| 734 |
} |
|---|
| 735 |
|
|---|
| 736 |
print STDERR "Debug level set to: $conf->{debug} [", join( ', ', @debug_str), "]\n"; |
|---|
| 737 |
} |
|---|
| 738 |
|
|---|
| 739 |
|
|---|
| 740 |
#============================================================================ |
|---|
| 741 |
# |
|---|
| 742 |
# This is the main controller (entry point), where a config hash is passed in. |
|---|
| 743 |
# |
|---|
| 744 |
# Loads the request module (e.g. CGI.pm), and the output module |
|---|
| 745 |
# Also sets up debugging |
|---|
| 746 |
# |
|---|
| 747 |
#============================================================================ |
|---|
| 748 |
|
|---|
| 749 |
sub process_request { |
|---|
| 750 |
my $conf = shift; # configuration parameters |
|---|
| 751 |
|
|---|
| 752 |
|
|---|
| 753 |
|
|---|
| 754 |
# Limit number of requests - questionable value |
|---|
| 755 |
limit_swish( $conf->{limit_procs}, $conf->{ps_prog} ) |
|---|
| 756 |
if !$conf->{use_library} |
|---|
| 757 |
&& $conf->{limit_procs} && $conf->{limit_procs} =~ /^\d+$/ |
|---|
| 758 |
&& $conf->{ps_prog}; |
|---|
| 759 |
|
|---|
| 760 |
|
|---|
| 761 |
|
|---|
| 762 |
# Set default property used or the href link to the document |
|---|
| 763 |
$conf->{link_property} ||= 'swishdocpath'; |
|---|
| 764 |
|
|---|
| 765 |
# Use CGI.pm by default |
|---|
| 766 |
my $request_package = $conf->{request_package} || 'CGI'; |
|---|
| 767 |
|
|---|
| 768 |
load_module( $request_package ); |
|---|
| 769 |
my $request_object = $request_package->new; |
|---|
| 770 |
|
|---|
| 771 |
|
|---|
| 772 |
# load the templating module |
|---|
| 773 |
my $template = $conf->{template} || { package => 'SWISH::TemplateDefault' }; |
|---|
| 774 |
load_module( $template->{package} ); |
|---|
| 775 |
|
|---|
| 776 |
|
|---|
| 777 |
# Allow fixup within the config file |
|---|
| 778 |
if ( $conf->{request_fixup} && ref $conf->{request_fixup} eq 'CODE' ) { |
|---|
| 779 |
&{$conf->{request_fixup}}( $request_object, $conf ); |
|---|
| 780 |
} |
|---|
| 781 |
|
|---|
| 782 |
|
|---|
| 783 |
set_debug_input( $conf, $request_object ) |
|---|
| 784 |
if $conf->{debug} && !$ENV{GATEWAY_INTERFACE}; |
|---|
| 785 |
|
|---|
| 786 |
|
|---|
| 787 |
# Create search object and build a query based on CGI parameters |
|---|
| 788 |
my $search = SwishQuery->new( |
|---|
| 789 |
config => $conf, |
|---|
| 790 |
request => $request_object, |
|---|
| 791 |
); |
|---|
| 792 |
|
|---|
| 793 |
|
|---|
| 794 |
|
|---|
| 795 |
|
|---|
| 796 |
# run the query (run if there's a query) |
|---|
| 797 |
$search->run_query; # currently, results is the just the $search object |
|---|
| 798 |
|
|---|
| 799 |
if ( $search->hits ) { |
|---|
| 800 |
$search->set_navigation; # sets links |
|---|
| 801 |
} |
|---|
| 802 |
|
|---|
| 803 |
|
|---|
| 804 |
|
|---|
| 805 |
show_debug_output( $conf, $search ) |
|---|
| 806 |
if $conf->{debug}; |
|---|
| 807 |
|
|---|
| 808 |
|
|---|
| 809 |
$template->{package}->show_template( $template, $search ); |
|---|
| 810 |
} |
|---|
| 811 |
|
|---|
| 812 |
|
|---|
| 813 |
# For limiting number of swish-e binaries |
|---|
| 814 |
|
|---|
| 815 |
sub limit_swish { |
|---|
| 816 |
my ( $limit_procs, $ps_prog ) = @_; |
|---|
| 817 |
|
|---|
| 818 |
|
|---|
| 819 |
my $num_procs = scalar grep { /swish-e/ } `$ps_prog`; |
|---|
| 820 |
return if $num_procs <= $limit_procs; |
|---|
| 821 |
|
|---|
| 822 |
warn "swish.cgi - limited due to too many currently running swish-e binaries: $num_procs running is more than $limit_procs\n"; |
|---|
| 823 |
|
|---|
| 824 |
## Abort |
|---|
| 825 |
print <<EOF; |
|---|
| 826 |
Status: 503 Too many requests |
|---|
| 827 |
|
|---|
| 828 |
<html> |
|---|
| 829 |
<head><title>Too Many Requests</title></head> |
|---|
| 830 |
<body> |
|---|
| 831 |
Too Many Requests -- Try back later |
|---|
| 832 |
</body> |
|---|
| 833 |
</html> |
|---|
| 834 |
EOF |
|---|
| 835 |
|
|---|
| 836 |
exit; |
|---|
| 837 |
} |
|---|
| 838 |
|
|---|
| 839 |
|
|---|
| 840 |
|
|---|
| 841 |
|
|---|
| 842 |
#============================================================================ |
|---|
| 843 |
# |
|---|
| 844 |
# Loads a perl module -- and shows a pretty web page to say the obvious |
|---|
| 845 |
# |
|---|
| 846 |
# |
|---|
| 847 |
#============================================================================ |
|---|
| 848 |
sub load_module { |
|---|
| 849 |
my $package = shift; |
|---|
| 850 |
$package =~ s[::][/]g; |
|---|
| 851 |
eval { require "$package.pm" }; |
|---|
| 852 |
if ( $@ ) { |
|---|
| 853 |
print <<EOF; |
|---|
| 854 |
Content-Type: text/html |
|---|
| 855 |
|
|---|
| 856 |
<html> |
|---|
| 857 |
<head><title>Software Error</title></head> |
|---|
| 858 |
<body><h2>Software Error</h2><p>Please check error log</p></body> |
|---|
| 859 |
</html> |
|---|
| 860 |
EOF |
|---|
| 861 |
|
|---|
| 862 |
die "$0 $@\n"; |
|---|
| 863 |
} |
|---|
| 864 |
} |
|---|
| 865 |
|
|---|
| 866 |
|
|---|
| 867 |
|
|---|
| 868 |
#================================================================== |
|---|
| 869 |
# set debugging input |
|---|
| 870 |
# |
|---|
| 871 |
#================================================================== |
|---|
| 872 |
|
|---|
| 873 |
sub set_debug_input { |
|---|
| 874 |
my ( $conf, $request_object ) = @_; |
|---|
| 875 |
|
|---|
| 876 |
print STDERR 'Enter a query [all]: '; |
|---|
| 877 |
my $query = <STDIN>; |
|---|
| 878 |
$query =~ tr/\r//d; |
|---|
| 879 |
chomp $query; |
|---|
| 880 |
unless ( $query ) { |
|---|
| 881 |
print STDERR "Using 'not asdfghjklzxcv' to match all records\n"; |
|---|
| 882 |
$query = 'not asdfghjklzxcv'; |
|---|
| 883 |
} |
|---|
| 884 |
|
|---|
| 885 |
$request_object->param('query', $query ); |
|---|
| 886 |
|
|---|
| 887 |
print STDERR 'Enter max results to display [1]: '; |
|---|
| 888 |
my $max = <STDIN>; |
|---|
| 889 |
chomp $max; |
|---|
| 890 |
$max = 1 unless $max && $max =~/^\d+$/; |
|---|
| 891 |
|
|---|
| 892 |
$conf->{page_size} = $max; |
|---|
| 893 |
} |
|---|
| 894 |
|
|---|
| 895 |
#================================================================== |
|---|
| 896 |
# show debugging output |
|---|
| 897 |
# |
|---|
| 898 |
#================================================================== |
|---|
| 899 |
sub show_debug_output { |
|---|
| 900 |
my ( $conf, $results ) = @_; |
|---|
| 901 |
|
|---|
| 902 |
require Data::Dumper; |
|---|
| 903 |
|
|---|
| 904 |
|
|---|
| 905 |
if ( $results->hits ) { |
|---|
| 906 |
print STDERR "swish.cgi: returned a page of $results->{navigation}{showing} results of $results->{navigation}{hits} total hits\n"; |
|---|
| 907 |
} else { |
|---|
| 908 |
print STDERR "swish.cgi: no results\n"; |
|---|
| 909 |
} |
|---|
| 910 |
|
|---|
| 911 |
if ($conf->{debug} & $SwishSearch::DEBUG_HEADERS ) { |
|---|
| 912 |
print STDERR "\n------------- Index Headers ------------\n"; |
|---|
| 913 |
if ( $results->{_headers} ) { |
|---|
| 914 |
print STDERR Data::Dumper::Dumper( $results->{_headers} ); |
|---|
| 915 |
} else { |
|---|
| 916 |
print STDERR "No headers\n"; |
|---|
| 917 |
} |
|---|
| 918 |
|
|---|
| 919 |
print STDERR "--------------------------\n"; |
|---|
| 920 |
} |
|---|
| 921 |
|
|---|
| 922 |
|
|---|
| 923 |
|
|---|
| 924 |
if ( $conf->{debug} & $SwishSearch::DEBUG_DUMP_DATA ) { |
|---|
| 925 |
print STDERR "\n------------- Results structure passed to template ------------\n", |
|---|
| 926 |
Data::Dumper::Dumper( $results ), |
|---|
| 927 |
"--------------------------\n"; |
|---|
| 928 |
|
|---|
| 929 |
} elsif ( $conf->{debug} & $SwishSearch::DEBUG_SUMMARY ) { |
|---|
| 930 |
print STDERR "\n------------- Results summary ------------\n"; |
|---|
| 931 |
if ( $results->{hits} ) { |
|---|
| 932 |
print STDERR "$_->{swishrank} $_->{swishdocpath}\n" for @{ $results->{_results}}; |
|---|
| 933 |
|
|---|
| 934 |
} else { |
|---|
| 935 |
print STDERR "** NO RESULTS **\n"; |
|---|
| 936 |
} |
|---|
| 937 |
|
|---|
| 938 |
} elsif ( $conf->{debug} & $SwishSearch::DEBUG_RESULTS ) { |
|---|
| 939 |
print STDERR "\n------------- Results detail ------------\n"; |
|---|
| 940 |
if ( $results->{hits} ) { |
|---|
| 941 |
print STDERR Data::Dumper::Dumper( $results->{_results} ); |
|---|
| 942 |
} else { |
|---|
| 943 |
print STDERR "** NO RESULTS **\n"; |
|---|
| 944 |
} |
|---|
| 945 |
|
|---|
| 946 |
print STDERR "--------------------------\n"; |
|---|
| 947 |
} |
|---|
| 948 |
} |
|---|
| 949 |
|
|---|
| 950 |
|
|---|
| 951 |
|
|---|
| 952 |
|
|---|
| 953 |
|
|---|
| 954 |
|
|---|
| 955 |
|
|---|
| 956 |
#================================================================================================== |
|---|
| 957 |
package SwishQuery; |
|---|
| 958 |
#================================================================================================== |
|---|
| 959 |
|
|---|
| 960 |
use Carp; |
|---|
| 961 |
# Or use this instead -- PLEASE see perldoc CGI::Carp for details |
|---|
| 962 |
# <opinion>CGI::Carp doesn't help that much</opinion> |
|---|
| 963 |
#use CGI::Carp; # qw(fatalsToBrowser); |
|---|
| 964 |
|
|---|
| 965 |
use SWISH::ParseQuery; |
|---|
| 966 |
|
|---|
| 967 |
|
|---|
| 968 |
|
|---|
| 969 |
|
|---|
| 970 |
#-------------------------------------------------------------------------------- |
|---|
| 971 |
# new() doesn't do much, just create the object |
|---|
| 972 |
#-------------------------------------------------------------------------------- |
|---|
| 973 |
sub new { |
|---|
| 974 |
my $class = shift; |
|---|
| 975 |
my %options = @_; |
|---|
| 976 |
|
|---|
| 977 |
my $conf = $options{config}; |
|---|
| 978 |
|
|---|
| 979 |
croak "Failed to set the swish index files in config setting 'swish_index'" unless $conf->{swish_index}; |
|---|
| 980 |
croak "Failed to specify 'swish_binary' in configuration" unless $conf->{swish_binary}; |
|---|
| 981 |
|
|---|
| 982 |
# initialize the request search hash |
|---|
| 983 |
my $sh = { |
|---|
| 984 |
prog => $conf->{swish_binary}, |
|---|
| 985 |
config => $conf, |
|---|
| 986 |
q => $options{request}, |
|---|
| 987 |
hits => 0, |
|---|
| 988 |
MOD_PERL => $ENV{MOD_PERL}, |
|---|
| 989 |
}; |
|---|
| 990 |
|
|---|
| 991 |
my $self = bless $sh, $class; |
|---|
| 992 |
|
|---|
| 993 |
|
|---|
| 994 |
# load highlight module, if requsted |
|---|
| 995 |
|
|---|
| 996 |
if ( my $highlight = $self->config('highlight') ) { |
|---|
| 997 |
$highlight->{package} ||= 'SWISH::DefaultHighlight'; |
|---|
| 998 |
SwishSearch::load_module( $highlight->{package} ); |
|---|
| 999 |
} |
|---|
| 1000 |
|
|---|
| 1001 |
|
|---|
| 1002 |
# Fetch the swish-e query from the CGI parameters |
|---|
| 1003 |
$self->set_query; |
|---|
| 1004 |
|
|---|
| 1005 |
return $self; |
|---|
| 1006 |
} |
|---|
| 1007 |
|
|---|
| 1008 |
|
|---|
| 1009 |
sub hits { shift->{hits} } |
|---|
| 1010 |
|
|---|
| 1011 |
sub config { |
|---|
| 1012 |
my ($self, $setting, $value ) = @_; |
|---|
| 1013 |
|
|---|
| 1014 |
confess "Failed to pass 'config' a setting" unless $setting; |
|---|
| 1015 |
|
|---|
| 1016 |
my $cur = $self->{config}{$setting} if exists $self->{config}{$setting}; |
|---|
| 1017 |
|
|---|
| 1018 |
$self->{config}{$setting} = $value if $value; |
|---|
| 1019 |
|
|---|
| 1020 |
return $cur; |
|---|
| 1021 |
} |
|---|
| 1022 |
|
|---|
| 1023 |
# Returns false if all of @values are not valid options - for checking |
|---|
| 1024 |
# $config is what $self->config returns |
|---|
| 1025 |
|
|---|
| 1026 |
sub is_valid_config_option { |
|---|
| 1027 |
my ( $self, $config, $err_msg, @values ) = @_; |
|---|
| 1028 |
|
|---|
| 1029 |
unless ( $config ) { |
|---|
| 1030 |
$self->errstr( "No config option set: $err_msg" ); |
|---|
| 1031 |
return; |
|---|
| 1032 |
} |
|---|
| 1033 |
|
|---|
| 1034 |
# Allow multiple values. |
|---|
| 1035 |
my @options = ref $config eq 'ARRAY' ? @$config : ( $config ); |
|---|
| 1036 |
|
|---|
| 1037 |
my %lookup = map { $_ => 1 } @options; |
|---|
| 1038 |
|
|---|
| 1039 |
for ( @values ) { |
|---|
| 1040 |
unless ( exists $lookup{ $_ } ) { |
|---|
| 1041 |
$self->errstr( $err_msg ); |
|---|
| 1042 |
return; |
|---|
| 1043 |
} |
|---|
| 1044 |
} |
|---|
| 1045 |
|
|---|
| 1046 |
return 1; |
|---|
| 1047 |
} |
|---|
| 1048 |
|
|---|
| 1049 |
|
|---|
| 1050 |
sub header { |
|---|
| 1051 |
my $self = shift; |
|---|
| 1052 |
return unless ref $self->{_headers} eq 'HASH'; |
|---|
| 1053 |
|
|---|
| 1054 |
return $self->{_headers}{$_[0]} || ''; |
|---|
| 1055 |
} |
|---|
| 1056 |
|
|---|
| 1057 |
|
|---|
| 1058 |
# return a ref to an array |
|---|
| 1059 |
sub results { |
|---|
| 1060 |
my $self = shift; |
|---|
| 1061 |
return $self->{_results} || undef; |
|---|
| 1062 |
} |
|---|
| 1063 |
|
|---|
| 1064 |
sub navigation { |
|---|
| 1065 |
my $self = shift; |
|---|
| 1066 |
return unless ref $self->{navigation} eq 'HASH'; |
|---|
| 1067 |
|
|---|
| 1068 |
return exists $self->{navigation}{$_[0]} ? $self->{navigation}{$_[0]} : ''; |
|---|
| 1069 |
} |
|---|
| 1070 |
|
|---|
| 1071 |
sub CGI { $_[0]->{q} }; |
|---|
| 1072 |
|
|---|
| 1073 |
|
|---|
| 1074 |
|
|---|
| 1075 |
|
|---|
| 1076 |
sub swish_command { |
|---|
| 1077 |
|
|---|
| 1078 |
my ($self, $param_name, $value ) = @_; |
|---|
| 1079 |
|
|---|
| 1080 |
return $self->{swish_command} || {} unless $param_name; |
|---|
| 1081 |
return $self->{swish_command}{$param_name} || '' unless $value; |
|---|
| 1082 |
|
|---|
| 1083 |
$self->{swish_command}{$param_name} = $value; |
|---|
| 1084 |
} |
|---|
| 1085 |
|
|---|
| 1086 |
# For use when forking |
|---|
| 1087 |
|
|---|
| 1088 |
sub swish_command_array { |
|---|
| 1089 |
|
|---|
| 1090 |
my ($self ) = @_; |
|---|
| 1091 |
|
|---|
| 1092 |
my @params; |
|---|
| 1093 |
my $swish_command = $self->swish_command; |
|---|
| 1094 |
|
|---|
| 1095 |
for ( keys %$swish_command ) { |
|---|
| 1096 |
|
|---|
| 1097 |
my $value = $swish_command->{$_}; |
|---|
| 1098 |
|
|---|
| 1099 |
if ( /^-/ ) { |
|---|
| 1100 |
push @params, $_; |
|---|
| 1101 |
push @params, ref $value eq 'ARRAY' ? @$value : $value; |
|---|
| 1102 |
next; |
|---|
| 1103 |
} |
|---|
| 1104 |
|
|---|
| 1105 |
# special cases |
|---|
| 1106 |
if ( $_ eq 'limits' ) { |
|---|
| 1107 |
push @params, '-L', $value->{prop}, $value->{low}, $value->{high}; |
|---|
| 1108 |
next; |
|---|
| 1109 |
} |
|---|
| 1110 |
|
|---|
| 1111 |
die "Unknown swish_command '$_' = '$value'"; |
|---|
| 1112 |
} |
|---|
| 1113 |
|
|---|
| 1114 |
return @params; |
|---|
| 1115 |
|
|---|
| 1116 |
} |
|---|
| 1117 |
|
|---|
| 1118 |
|
|---|
| 1119 |
|
|---|
| 1120 |
sub errstr { |
|---|
| 1121 |
my ($self, $value ) = @_; |
|---|
| 1122 |
|
|---|
| 1123 |
|
|---|
| 1124 |
$self->{_errstr} = $value if $value; |
|---|
| 1125 |
|
|---|
| 1126 |
return $self->{_errstr} || ''; |
|---|
| 1127 |
} |
|---|
| 1128 |
|
|---|
| 1129 |
|
|---|
| 1130 |
#============================================================================== |
|---|
| 1131 |
# Set query from the CGI parameters |
|---|
| 1132 |
#------------------------------------------------------------------------------ |
|---|
| 1133 |
|
|---|
| 1134 |
sub set_query { |
|---|
| 1135 |
my $self = shift; |
|---|
| 1136 |
my $q = $self->{q}; |
|---|
| 1137 |
|
|---|
| 1138 |
# Sets the query string, and any -L limits. |
|---|
| 1139 |
return unless $self->build_query; |
|---|
| 1140 |
|
|---|
| 1141 |
# Set the starting position (which is offset by one) |
|---|
| 1142 |
|
|---|
| 1143 |
my $start = $q->param('start') || 0; |
|---|
| 1144 |
$start = 0 unless $start =~ /^\d+$/ && $start >= 0; |
|---|
| 1145 |
|
|---|
| 1146 |
$self->swish_command( '-b', $start+1 ); |
|---|
| 1147 |
|
|---|
| 1148 |
|
|---|
| 1149 |
|
|---|
| 1150 |
# Set the max hits |
|---|
| 1151 |
|
|---|
| 1152 |
my $page_size = $self->config('page_size') || 15; |
|---|
| 1153 |
$self->swish_command( '-m', $page_size ); |
|---|
| 1154 |
|
|---|
| 1155 |
|
|---|
| 1156 |
return unless $self->set_index_file; |
|---|
| 1157 |
|
|---|
| 1158 |
|
|---|
| 1159 |
# Set the sort option, if any |
<
|---|