root/libswish3/trunk/bindings/perl/lib/SWISH/3.pm

Revision 2233, 4.8 kB (checked in by karpet, 3 days ago)

export header read/write in Perl; redefine some macros

Line 
1 use strict;
2 use warnings;
3 use 5.008_003;
4
5 package SWISH::3;
6
7 our $VERSION = '0.01';
8
9 # set by libswish3 in swish.c but that happens after %ENV has been
10 # initialized at Perl compile time.
11 $ENV{SWISH3} = 1;
12
13 use Carp;
14 use Data::Dump;
15 use Devel::Peek;
16
17 use base qw( Exporter );
18
19 use constant SWISH_DOC_FIELDS =>
20     qw( mtime size encoding mime uri nwords ext parser );
21 use constant SWISH_TOKEN_FIELDS => qw( pos meta value context len );
22
23 # load the XS at runtime, since we need $VERSION
24 require XSLoader;
25 XSLoader::load( __PACKAGE__, $VERSION );
26
27 # init the memory counter as class method at start up
28 # and call debug in END block
29 SWISH::3->init;
30
31 END {
32     if ( SWISH::3->get_memcount ) {
33         warn " ***WARNING*** possible memory leak ***WARNING***\n";
34         SWISH::3->mem_debug();
35     }
36 }
37
38 # our symbol table is populated with newCONSTSUB in Constants.xs
39 # directly from libswish3.h, so we just grep the symbol table.
40 my @constants = ( grep {m/^SWISH_/} keys %SWISH::3:: );
41
42 our %EXPORT_TAGS = ( 'constants' => [@constants], );
43 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'constants'} } );
44
45 # these numbers are assigned via enum in libswish.h
46 # and so are too tedious to parse via Makefile.PL
47 # since they are typically only added-to, not a big deal
48 # to maintain manually here.
49 use constant SWISH_DOC_FIELDS_MAP => {
50     mtime    => 5,
51     size     => 4,
52     encoding => 10,
53     mime     => 8,
54     uri      => 1,
55     nwords   => 7,
56     parser   => 9,
57     title    => 3,
58 };
59
60 # convenience accessors
61 *config   = \&get_config;
62 *analyzer = \&get_analyzer;
63 *regex    = \&get_regex;
64 *parser   = \&get_parser;
65
66 # alias debugging methods for all classes
67 *SWISH::3::Config::refcount   = \&refcount;
68 *SWISH::3::Analyzer::refcount = \&refcount;
69 *SWISH::3::WordList::refcount = \&refcount;
70 *SWISH::3::Word::refcount     = \&refcount;
71 *SWISH::3::Doc::refcount      = \&refcount;
72 *SWISH::3::Data::refcount     = \&refcount;
73
74 sub new {
75     my $class = shift;
76     my %arg   = @_;
77     my $self  = $class->_setup;
78
79     if ( $arg{config} ) {
80         $self->get_config->add( $arg{config} );
81     }
82
83     # override defaults
84     for my $param (qw( data_class parser_class config_class analyzer_class ))
85     {
86         my $method = 'set_' . $param;
87
88         if ( exists $arg{$param} ) {
89
90             #warn "$method";
91             $self->$method( $arg{$param} );
92         }
93     }
94
95     $arg{handler} ||= \&default_handler;
96
97     $self->set_handler( $arg{handler} );
98
99     # KS default regex -- should also match swish_tokenize() behaviour
100     $arg{regex} ||= qr/\w+(?:'\w+)*/;
101     $self->set_regex( $arg{regex} );
102
103     return $self;
104 }
105
106 sub parse {
107     my $self = shift;
108     my $what = shift
109         or croak "parse requires filehandle, scalar ref or file path";
110     if ( ref $what eq 'SCALAR' ) {
111         return $self->parse_buffer($what);
112     }
113     elsif ( ref $what ) {
114         return $self->parse_fh($what);
115     }
116     else {
117         return $self->parse_file($what);
118     }
119 }
120
121 sub dump {
122     my $self = shift;
123     if (@_) {
124         Dump(@_);
125         Data::Dump::dump(@_);
126     }
127     else {
128         Dump($self);
129         Data::Dump::dump($self);
130     }
131 }
132
133 sub default_handler {
134     my $data = shift;
135     unless ( $ENV{SWISH_DEBUG} ) {
136         warn "default handler called\n";
137         return;
138     }
139
140     select(STDERR);
141     print '~' x 80, "\n";
142
143     my $props     = $data->properties;
144     my $prop_hash = $data->config->get_properties;
145
146     print "Properties\n";
147     for my $p ( sort keys %$props ) {
148         print " key: $p\n";
149         my $prop_value = $props->{$p};
150         print " value: " . Data::Dump::dump($prop_value) . "\n";
151         my $prop = $prop_hash->get($p);
152         printf( "    <%s type='%s'>%s</%s>\n",
153             $prop->name, $prop->type, $data->property($p), $prop->name );
154     }
155
156     print "Doc\n";
157     for my $d (SWISH_DOC_FIELDS) {
158         printf( "%15s: %s\n", $d, $data->doc->$d );
159     }
160
161     print "WordList\n";
162     my $wordlist = $data->wordlist;
163     while ( my $swishword = $wordlist->next ) {
164         print '-' x 50, "\n";
165         for my $w (SWISH_TOKEN_FIELDS) {
166             printf( "%15s: %s\n", $w, $swishword->$w );
167         }
168     }
169 }
170
171 1;
172 __END__
173
174 =head1 NAME
175
176 SWISH::3 - Perl interface to libswish3
177
178 =head1 SYNOPSIS
179
180  use SWISH::3;
181  my $swish3 = SWISH::3->new(
182                 config      => 'path/to/config.xml',
183                 handler     => \&my_handler,
184                 regex       => qr/\w+(?:'\w+)*/,
185                 );
186  $swish3->parse( 'path/to/file.xml' )
187     or die "failed to parse file: " . $swish3->error;
188  
189  printf "libxml2 version %s\n", $swish3->xml2_version;
190  printf "libswish3 version %s\n", $swish3->version;
191  
192  
193 =head1 DESCRIPTION
194
195 SWISH::3 is a Perl interface to the libswish3 C library.
196
197
198 =head1 METHODS
199
200 =head2 xml2_version
201
202 Returns the libxml2 version used by libswish3.
203
204 =head2 version
205
206 Returns the libswish3 version.
207
208
209 =head1 SEE ALSO
210
211 L<http://swish-e.org/>
212
213 SWISH::Prog
214
215 =cut
Note: See TracBrowser for help on using the browser.