| 1 |
use strict; |
|---|
| 2 |
use warnings; |
|---|
| 3 |
use 5.008_003; |
|---|
| 4 |
|
|---|
| 5 |
package SWISH::3; |
|---|
| 6 |
|
|---|
| 7 |
our $VERSION = '0.01'; |
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
$ENV{SWISH3} = 1; |
|---|
| 12 |
|
|---|
| 13 |
use Carp; |
|---|
| 14 |
use Data::Dump; |
|---|
| 15 |
use Devel::Peek; |
|---|
| 16 |
|
|---|
| 17 |
use base qw( Exporter ); |
|---|
| 18 |
|
|---|
| 19 |
use constant SWISH_DOC_FIELDS => |
|---|
| 20 |
qw( mtime size encoding mime uri nwords ext parser ); |
|---|
| 21 |
use constant SWISH_TOKEN_FIELDS => qw( pos meta value context len ); |
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 |
require XSLoader; |
|---|
| 25 |
XSLoader::load( __PACKAGE__, $VERSION ); |
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 |
SWISH::3->init; |
|---|
| 30 |
|
|---|
| 31 |
END { |
|---|
| 32 |
if ( SWISH::3->get_memcount ) { |
|---|
| 33 |
warn " ***WARNING*** possible memory leak ***WARNING***\n"; |
|---|
| 34 |
SWISH::3->mem_debug(); |
|---|
| 35 |
} |
|---|
| 36 |
} |
|---|
| 37 |
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
my @constants = ( grep {m/^SWISH_/} keys %SWISH::3:: ); |
|---|
| 41 |
|
|---|
| 42 |
our %EXPORT_TAGS = ( 'constants' => [@constants], ); |
|---|
| 43 |
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'constants'} } ); |
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 |
use constant SWISH_DOC_FIELDS_MAP => { |
|---|
| 50 |
mtime => 5, |
|---|
| 51 |
size => 4, |
|---|
| 52 |
encoding => 10, |
|---|
| 53 |
mime => 8, |
|---|
| 54 |
uri => 1, |
|---|
| 55 |
nwords => 7, |
|---|
| 56 |
parser => 9, |
|---|
| 57 |
title => 3, |
|---|
| 58 |
}; |
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 |
*config = \&get_config; |
|---|
| 62 |
*analyzer = \&get_analyzer; |
|---|
| 63 |
*regex = \&get_regex; |
|---|
| 64 |
*parser = \&get_parser; |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
*SWISH::3::Config::refcount = \&refcount; |
|---|
| 68 |
*SWISH::3::Analyzer::refcount = \&refcount; |
|---|
| 69 |
*SWISH::3::WordList::refcount = \&refcount; |
|---|
| 70 |
*SWISH::3::Word::refcount = \&refcount; |
|---|
| 71 |
*SWISH::3::Doc::refcount = \&refcount; |
|---|
| 72 |
*SWISH::3::Data::refcount = \&refcount; |
|---|
| 73 |
|
|---|
| 74 |
sub new { |
|---|
| 75 |
my $class = shift; |
|---|
| 76 |
my %arg = @_; |
|---|
| 77 |
my $self = $class->_setup; |
|---|
| 78 |
|
|---|
| 79 |
if ( $arg{config} ) { |
|---|
| 80 |
$self->get_config->add( $arg{config} ); |
|---|
| 81 |
} |
|---|
| 82 |
|
|---|
| 83 |
|
|---|
| 84 |
for my $param (qw( data_class parser_class config_class analyzer_class )) |
|---|
| 85 |
{ |
|---|
| 86 |
my $method = 'set_' . $param; |
|---|
| 87 |
|
|---|
| 88 |
if ( exists $arg{$param} ) { |
|---|
| 89 |
|
|---|
| 90 |
|
|---|
| 91 |
$self->$method( $arg{$param} ); |
|---|
| 92 |
} |
|---|
| 93 |
} |
|---|
| 94 |
|
|---|
| 95 |
$arg{handler} ||= \&default_handler; |
|---|
| 96 |
|
|---|
| 97 |
$self->set_handler( $arg{handler} ); |
|---|
| 98 |
|
|---|
| 99 |
|
|---|
| 100 |
$arg{regex} ||= qr/\w+(?:'\w+)*/; |
|---|
| 101 |
$self->set_regex( $arg{regex} ); |
|---|
| 102 |
|
|---|
| 103 |
return $self; |
|---|
| 104 |
} |
|---|
| 105 |
|
|---|
| 106 |
sub parse { |
|---|
| 107 |
my $self = shift; |
|---|
| 108 |
my $what = shift |
|---|
| 109 |
or croak "parse requires filehandle, scalar ref or file path"; |
|---|
| 110 |
if ( ref $what eq 'SCALAR' ) { |
|---|
| 111 |
return $self->parse_buffer($what); |
|---|
| 112 |
} |
|---|
| 113 |
elsif ( ref $what ) { |
|---|
| 114 |
return $self->parse_fh($what); |
|---|
| 115 |
} |
|---|
| 116 |
else { |
|---|
| 117 |
return $self->parse_file($what); |
|---|
| 118 |
} |
|---|
| 119 |
} |
|---|
| 120 |
|
|---|
| 121 |
sub dump { |
|---|
| 122 |
my $self = shift; |
|---|
| 123 |
if (@_) { |
|---|
| 124 |
Dump(@_); |
|---|
| 125 |
Data::Dump::dump(@_); |
|---|
| 126 |
} |
|---|
| 127 |
else { |
|---|
| 128 |
Dump($self); |
|---|
| 129 |
Data::Dump::dump($self); |
|---|
| 130 |
} |
|---|
| 131 |
} |
|---|
| 132 |
|
|---|
| 133 |
sub default_handler { |
|---|
| 134 |
my $data = shift; |
|---|
| 135 |
unless ( $ENV{SWISH_DEBUG} ) { |
|---|
| 136 |
warn "default handler called\n"; |
|---|
| 137 |
return; |
|---|
| 138 |
} |
|---|
| 139 |
|
|---|
| 140 |
select(STDERR); |
|---|
| 141 |
print '~' x 80, "\n"; |
|---|
| 142 |
|
|---|
| 143 |
my $props = $data->properties; |
|---|
| 144 |
my $prop_hash = $data->config->get_properties; |
|---|
| 145 |
|
|---|
| 146 |
print "Properties\n"; |
|---|
| 147 |
for my $p ( sort keys %$props ) { |
|---|
| 148 |
print " key: $p\n"; |
|---|
| 149 |
my $prop_value = $props->{$p}; |
|---|
| 150 |
print " value: " . Data::Dump::dump($prop_value) . "\n"; |
|---|
| 151 |
my $prop = $prop_hash->get($p); |
|---|
| 152 |
printf( " <%s type='%s'>%s</%s>\n", |
|---|
| 153 |
$prop->name, $prop->type, $data->property($p), $prop->name ); |
|---|
| 154 |
} |
|---|
| 155 |
|
|---|
| 156 |
print "Doc\n"; |
|---|
| 157 |
for my $d (SWISH_DOC_FIELDS) { |
|---|
| 158 |
printf( "%15s: %s\n", $d, $data->doc->$d ); |
|---|
| 159 |
} |
|---|
| 160 |
|
|---|
| 161 |
print "WordList\n"; |
|---|
| 162 |
my $wordlist = $data->wordlist; |
|---|
| 163 |
while ( my $swishword = $wordlist->next ) { |
|---|
| 164 |
print '-' x 50, "\n"; |
|---|
| 165 |
for my $w (SWISH_TOKEN_FIELDS) { |
|---|
| 166 |
printf( "%15s: %s\n", $w, $swishword->$w ); |
|---|
| 167 |
} |
|---|
| 168 |
} |
|---|
| 169 |
} |
|---|
| 170 |
|
|---|
| 171 |
1; |
|---|
| 172 |
__END__ |
|---|
| 173 |
|
|---|
| 174 |
=head1 NAME |
|---|
| 175 |
|
|---|
| 176 |
SWISH::3 - Perl interface to libswish3 |
|---|
| 177 |
|
|---|
| 178 |
=head1 SYNOPSIS |
|---|
| 179 |
|
|---|
| 180 |
use SWISH::3; |
|---|
| 181 |
my $swish3 = SWISH::3->new( |
|---|
| 182 |
config => 'path/to/config.xml', |
|---|
| 183 |
handler => \&my_handler, |
|---|
| 184 |
regex => qr/\w+(?:'\w+)*/, |
|---|
| 185 |
); |
|---|
| 186 |
$swish3->parse( 'path/to/file.xml' ) |
|---|
| 187 |
or die "failed to parse file: " . $swish3->error; |
|---|
| 188 |
|
|---|
| 189 |
printf "libxml2 version %s\n", $swish3->xml2_version; |
|---|
| 190 |
printf "libswish3 version %s\n", $swish3->version; |
|---|
| 191 |
|
|---|
| 192 |
|
|---|
| 193 |
=head1 DESCRIPTION |
|---|
| 194 |
|
|---|
| 195 |
SWISH::3 is a Perl interface to the libswish3 C library. |
|---|
| 196 |
|
|---|
| 197 |
|
|---|
| 198 |
=head1 METHODS |
|---|
| 199 |
|
|---|
| 200 |
=head2 xml2_version |
|---|
| 201 |
|
|---|
| 202 |
Returns the libxml2 version used by libswish3. |
|---|
| 203 |
|
|---|
| 204 |
=head2 version |
|---|
| 205 |
|
|---|
| 206 |
Returns the libswish3 version. |
|---|
| 207 |
|
|---|
| 208 |
|
|---|
| 209 |
=head1 SEE ALSO |
|---|
| 210 |
|
|---|
| 211 |
L<http://swish-e.org/> |
|---|
| 212 |
|
|---|
| 213 |
SWISH::Prog |
|---|
| 214 |
|
|---|
| 215 |
=cut |
|---|