root/libswish3/trunk/src/t/005-tokenizer.t

Revision 2142, 1.1 kB (checked in by karpet, 7 months ago)

add some tokenizer tests and (doh!) include tokenizer.c

Line 
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Test::More tests => 12;
5 use SwishTestUtils;
6
7 $ENV{SWISH_DEBUG_TOKENIZER} = 1;
8
9 ok( my $buf = SwishTestUtils::run_get_stderr('./swish_tokenize foobar'),
10     "tokenize foobar" );
11
12 #diag($buf);
13
14 like( $buf, qr/t->len\s+= 6/,        'length 6' );
15 like( $buf, qr/t->value\s+= foobar/, 'value foobar' );
16
17 ok( $buf = SwishTestUtils::run_get_stderr('./swish_tokenize ++foo++'),
18     "tokenize ++foo++" );
19
20 #diag($buf);
21
22 like( $buf, qr/t->len\s+= 3/,     'length 3' );
23 like( $buf, qr/t->value\s+= foo/, 'value foo' );
24
25 ok( $buf = SwishTestUtils::run_get_stderr(
26         './swish_tokenize 垃朗圚è¿
27 速跳䞋懒狐狗'),
28     "tokenize chinese"
29 );
30
31 #diag($buf);
32
33 like( $buf, qr/parsed 1 tokens/, "1 token" );
34
35 ok( $buf = SwishTestUtils::run_get_stderr(
36         "./swish_tokenize 'el zorro marrón rápido saltó sobre el perro perezoso'"
37     ),
38     "tokenize spanish"
39 );
40
41 #diag($buf);
42
43 like( $buf, qr/parsed 9 tokens/, "9 tokens" );
44
45 ok( $buf = SwishTestUtils::run_get_stderr(
46         "./swish_tokenize 'http://FOOBAR.COM/'"
47     ),
48     "tokenize uri"
49 );
50
51 #diag($buf);
52
53 like( $buf, qr/parsed 3 tokens/, "3 tokens" );
Note: See TracBrowser for help on using the browser.