|
Revision 2142, 1.1 kB
(checked in by karpet, 7 months ago)
|
add some tokenizer tests and (doh!) include tokenizer.c
|
| Line | |
|---|
| 1 |
#!/usr/bin/perl |
|---|
| 2 |
use strict; |
|---|
| 3 |
use warnings; |
|---|
| 4 |
use Test::More tests => 12; |
|---|
| 5 |
use SwishTestUtils; |
|---|
| 6 |
|
|---|
| 7 |
$ENV{SWISH_DEBUG_TOKENIZER} = 1; |
|---|
| 8 |
|
|---|
| 9 |
ok( my $buf = SwishTestUtils::run_get_stderr('./swish_tokenize foobar'), |
|---|
| 10 |
"tokenize foobar" ); |
|---|
| 11 |
|
|---|
| 12 |
#diag($buf); |
|---|
| 13 |
|
|---|
| 14 |
like( $buf, qr/t->len\s+= 6/, 'length 6' ); |
|---|
| 15 |
like( $buf, qr/t->value\s+= foobar/, 'value foobar' ); |
|---|
| 16 |
|
|---|
| 17 |
ok( $buf = SwishTestUtils::run_get_stderr('./swish_tokenize ++foo++'), |
|---|
| 18 |
"tokenize ++foo++" ); |
|---|
| 19 |
|
|---|
| 20 |
#diag($buf); |
|---|
| 21 |
|
|---|
| 22 |
like( $buf, qr/t->len\s+= 3/, 'length 3' ); |
|---|
| 23 |
like( $buf, qr/t->value\s+= foo/, 'value foo' ); |
|---|
| 24 |
|
|---|
| 25 |
ok( $buf = SwishTestUtils::run_get_stderr( |
|---|
| 26 |
'./swish_tokenize åžæåšè¿ |
|---|
| 27 |
éè·³äžæçç'), |
|---|
| 28 |
"tokenize chinese" |
|---|
| 29 |
); |
|---|
| 30 |
|
|---|
| 31 |
#diag($buf); |
|---|
| 32 |
|
|---|
| 33 |
like( $buf, qr/parsed 1 tokens/, "1 token" ); |
|---|
| 34 |
|
|---|
| 35 |
ok( $buf = SwishTestUtils::run_get_stderr( |
|---|
| 36 |
"./swish_tokenize 'el zorro marrón rápido saltó sobre el perro perezoso'" |
|---|
| 37 |
), |
|---|
| 38 |
"tokenize spanish" |
|---|
| 39 |
); |
|---|
| 40 |
|
|---|
| 41 |
#diag($buf); |
|---|
| 42 |
|
|---|
| 43 |
like( $buf, qr/parsed 9 tokens/, "9 tokens" ); |
|---|
| 44 |
|
|---|
| 45 |
ok( $buf = SwishTestUtils::run_get_stderr( |
|---|
| 46 |
"./swish_tokenize 'http://FOOBAR.COM/'" |
|---|
| 47 |
), |
|---|
| 48 |
"tokenize uri" |
|---|
| 49 |
); |
|---|
| 50 |
|
|---|
| 51 |
#diag($buf); |
|---|
| 52 |
|
|---|
| 53 |
like( $buf, qr/parsed 3 tokens/, "3 tokens" ); |
|---|