Changeset 1970
- Timestamp:
- 12/03/07 14:30:26 (5 months ago)
- Files:
-
- Swishetest/trunk/t/010-C010-trivial-txt.t (modified) (1 diff)
- Swishetest/trunk/t/011-C011-trivial-html.t (modified) (1 diff)
- Swishetest/trunk/t/012-C012-trivial-xml.t (modified) (1 diff)
- Swishetest/trunk/t/020-C020-wordsbasic-txt.t (modified) (1 diff)
- Swishetest/trunk/t/030-C030-medsm-xml.t (modified) (3 diffs)
- Swishetest/trunk/t/050-C020-largeindex.t (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
Swishetest/trunk/t/010-C010-trivial-txt.t
r1959 r1970 13 13 14 14 BEGIN { 15 require Carp;16 $SIG{__WARN__} = sub { Carp::confess $_[0] };17 use File::Path qw(mkpath);18 mkpath( ["blib/index"], 0, 0755);19 my $base = "C010";20 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-txt", "blib/index/$base.index" );21 #print STDERR "out is " . Dumper( \%out );15 require Carp; 16 $SIG{__WARN__} = sub { Carp::confess $_[0] }; 17 use File::Path qw(mkpath); 18 mkpath( ["blib/index"], 0, 0755); 19 my $base = "C010"; 20 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-txt", "blib/index/$base.index" ); 21 #print STDERR "out is " . Dumper( \%out ); 22 22 23 cmp_ok( scalar((keys(%out))), '>', 2, "Indexing output: " . scalar(%out) );24 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' );25 cmp_ok( $out{properties}, '==', 5, 'num properties' );26 cmp_ok( $out{files}, '==', 3, 'files indexed' );27 cmp_ok( $out{bytes}, '==', 24, 'bytes indexed' );28 cmp_ok( $out{words}, '==', 4, 'total words indexed' );29 23 cmp_ok( scalar((keys(%out))), '>', 2, "Indexing output: " . scalar(%out) ); 24 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' ); 25 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 26 cmp_ok( $out{files}, '==', 3, 'files indexed' ); 27 cmp_ok( $out{bytes}, '==', 24, 'bytes indexed' ); 28 cmp_ok( $out{words}, '==', 4, 'total words indexed' ); 29 30 30 DoSearch::open_index( "blib/index/$base.index" ); 31 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test");31 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test"); 32 32 DoSearch::close_index( "blib/index/$base.index" ); 33 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'")33 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'") 34 34 35 35 }; Swishetest/trunk/t/011-C011-trivial-html.t
r1959 r1970 12 12 13 13 BEGIN { 14 require Carp;15 $SIG{__WARN__} = sub { Carp::confess $_[0] };16 use File::Path qw(mkpath);17 mkpath( ["blib/index"], 0, 0755);18 my $base = "C011";19 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-html", "blib/index/$base.index" );14 require Carp; 15 $SIG{__WARN__} = sub { Carp::confess $_[0] }; 16 use File::Path qw(mkpath); 17 mkpath( ["blib/index"], 0, 0755); 18 my $base = "C011"; 19 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-html", "blib/index/$base.index" ); 20 20 21 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" );22 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' );23 cmp_ok( $out{properties}, '==', 5, 'num properties' );24 cmp_ok( $out{files}, '==', 4, 'files indexed' );25 cmp_ok( $out{bytes}, '==', 318, 'bytes indexed' );26 cmp_ok( $out{words}, '==', 8, 'total words indexed' );27 21 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" ); 22 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' ); 23 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 24 cmp_ok( $out{files}, '==', 4, 'files indexed' ); 25 cmp_ok( $out{bytes}, '==', 318, 'bytes indexed' ); 26 cmp_ok( $out{words}, '==', 8, 'total words indexed' ); 27 28 28 DoSearch::open_index( "blib/index/$base.index" ); 29 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test");29 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test"); 30 30 DoSearch::close_index( "blib/index/$base.index" ); 31 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'")31 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'") 32 32 33 33 }; Swishetest/trunk/t/012-C012-trivial-xml.t
r1959 r1970 12 12 13 13 BEGIN { 14 require Carp;15 $SIG{__WARN__} = sub { Carp::confess $_[0] };16 use File::Path qw(mkpath);17 mkpath( ["blib/index"], 0, 0755);18 my $base = "C012";19 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-xml", "blib/index/$base.index" );14 require Carp; 15 $SIG{__WARN__} = sub { Carp::confess $_[0] }; 16 use File::Path qw(mkpath); 17 mkpath( ["blib/index"], 0, 0755); 18 my $base = "C012"; 19 my (%out) = BuildIndex::build_index_from_directory( "data/$base-trivial-xml", "blib/index/$base.index" ); 20 20 21 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" );22 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' );23 cmp_ok( $out{properties}, '==', 5, 'num properties' );24 cmp_ok( $out{files}, '==', 4, 'files indexed' );25 cmp_ok( $out{bytes}, '==', 301, 'bytes indexed' );26 cmp_ok( $out{words}, '==', 8, 'total words indexed' );27 21 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" ); 22 cmp_ok( $out{unique}, '==', 2, 'unique words indexed' ); 23 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 24 cmp_ok( $out{files}, '==', 4, 'files indexed' ); 25 cmp_ok( $out{bytes}, '==', 301, 'bytes indexed' ); 26 cmp_ok( $out{words}, '==', 8, 'total words indexed' ); 27 28 28 DoSearch::open_index( "blib/index/$base.index" ); 29 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test");29 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test"); 30 30 DoSearch::close_index( "blib/index/$base.index" ); 31 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'")31 cmp_ok(scalar(@rows), '==', 3, "num results from 'swishe OR test'") 32 32 33 33 }; Swishetest/trunk/t/020-C020-wordsbasic-txt.t
r1959 r1970 12 12 13 13 BEGIN { 14 use File::Path qw(mkpath);15 mkpath( ["blib/index"], 0, 0755);16 my $base = "C020";17 my (%out) = BuildIndex::build_index_from_directory( "data/$base-words-txt", "blib/index/$base.index" );14 use File::Path qw(mkpath); 15 mkpath( ["blib/index"], 0, 0755); 16 my $base = "C020"; 17 my (%out) = BuildIndex::build_index_from_directory( "data/$base-words-txt", "blib/index/$base.index" ); 18 18 19 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" );20 cmp_ok( $out{unique}, '==', 252983, 'unique words indexed' );21 cmp_ok( $out{properties}, '==', 5, 'num properties' );22 cmp_ok( $out{files}, '==', 2, 'files indexed' );23 cmp_ok( $out{bytes}, '==', 2896130, 'bytes indexed' );24 cmp_ok( $out{words}, '==', 280381, 'total words indexed' );19 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" ); 20 cmp_ok( $out{unique}, '==', 252983, 'unique words indexed' ); 21 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 22 cmp_ok( $out{files}, '==', 2, 'files indexed' ); 23 cmp_ok( $out{bytes}, '==', 2896130, 'bytes indexed' ); 24 cmp_ok( $out{words}, '==', 280381, 'total words indexed' ); 25 25 26 26 DoSearch::open_index( "blib/index/$base.index" ); 27 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test");27 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test"); 28 28 DoSearch::close_index( "blib/index/$base.index" ); 29 cmp_ok(scalar(@rows), '==', 2, "num results from 'swishe OR test'")29 cmp_ok(scalar(@rows), '==', 2, "num results from 'swishe OR test'") 30 30 }; 31 31 Swishetest/trunk/t/030-C030-medsm-xml.t
r1959 r1970 18 18 use MinMax; 19 19 use File::Path qw(mkpath); 20 use GetDictionaryWords;21 use Test::More qw(no_plan);22 my $max_words = MinMax::min(1_000_000, ($ENV{MAX_INDEX_FILES} || 1_000_000));23 # predict number of tests based on number of files in dictionaries and number of index types24 my @dicts = qw( data/C020-words-txt/words-linux-fc1.txt data/C020-words-txt/words-osx-10_3.txt);20 use GetDictionaryWords; 21 use Test::More qw(no_plan); 22 my $max_words = MinMax::min(1_000_000, ($ENV{MAX_INDEX_FILES} || 1_000_000)); 23 # predict number of tests based on number of files in dictionaries and number of index types 24 my @dicts = qw( data/C020-words-txt/words-linux-fc1.txt data/C020-words-txt/words-osx-10_3.txt); 25 25 my @filetypes = qw(html xml txt); 26 26 my $numdicts = scalar(@dicts); … … 28 28 # three tests plus one for each word, for each dictionary and filetype. 29 29 #plan tests => ($numdicts * $numfiletypes * (3 + $max_words )); 30 mkpath( ["blib/index"], 0, 0755);31 my $base = "C030";32 for my $dict (@dicts) {33 for my $filetype ( @filetypes ) {34 ( my $dictname = $dict ) =~ s/^.*-(([^.]|-)+)\.txt$/$1/;35 #( my $dictname = $dict ) =~ s/\.txt$//;# this doesn't work36 #$dictname =~ s![^/]*/!!;# this doesn't work30 mkpath( ["blib/index"], 0, 0755); 31 my $base = "C030"; 32 for my $dict (@dicts) { 33 for my $filetype ( @filetypes ) { 34 ( my $dictname = $dict ) =~ s/^.*-(([^.]|-)+)\.txt$/$1/; 35 #( my $dictname = $dict ) =~ s/\.txt$//; # this doesn't work 36 #$dictname =~ s![^/]*/!!; # this doesn't work 37 37 38 my $index = "blib/index/${base}_${dictname}_${filetype}.index";39 my ($words, $word_count) = GetDictionaryWords::get_dictionary_words( $dict, 0, $max_words);40 # this filename should come from somewhere factored41 die "Couldn't get words from $dict" unless @$words;42 43 # make a collection from dict, one word per document44 my $cmd = "./make_collection --dict=$dict --norand --noenglishify " .45 "--filetype=$filetype --min_words=1 --max_words=1 --num_files=" . scalar(@$words);46 print STDERR "Using $cmd\n" if $ENV{TEST_VERBOSE};38 my $index = "blib/index/${base}_${dictname}_${filetype}.index"; 39 my ($words, $word_count) = GetDictionaryWords::get_dictionary_words( $dict, 0, $max_words); 40 # this filename should come from somewhere factored 41 die "Couldn't get words from $dict" unless @$words; 42 43 # make a collection from dict, one word per document 44 my $cmd = "./make_collection --dict=$dict --norand --noenglishify " . 45 "--filetype=$filetype --min_words=1 --max_words=1 --num_files=" . scalar(@$words); 46 print STDERR "Using $cmd\n" if $ENV{TEST_VERBOSE}; 47 47 48 my (%out) = BuildIndex::build_index_from_external_program( $cmd, $index);48 my (%out) = BuildIndex::build_index_from_external_program( $cmd, $index); 49 49 50 # first three tests: did the indexing seem to work?51 cmp_ok( scalar(%out), '>', 0,"Indexing output" );52 cmp_ok( $out{files}, '==', scalar(@$words), 'files indexed' );53 cmp_ok( $out{properties}, '==', 5,'num properties' );50 # first three tests: did the indexing seem to work? 51 cmp_ok( scalar(%out), '>', 0, "Indexing output" ); 52 cmp_ok( $out{files}, '==', scalar(@$words), 'files indexed' ); 53 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 54 54 55 55 DoSearch::open_index($index); 56 for my $word (@$words) {# then, one test for each word in the test57 my @rows = DoSearch::do_search($index, "'$word'"); # quote the word58 my ($num_expected_rows) = (# look up the count unless it's AND, OR, or NOT59 ($word =~ /^\s*(and|or|not|near)\s*$/i) ? 0 : ($word_count->{lc($word)} || 1));60 cmp_ok(scalar(@rows), "==", $num_expected_rows, "search '$word' ($filetype index from $dict)");61 }56 for my $word (@$words) { # then, one test for each word in the test 57 my @rows = DoSearch::do_search($index, "'$word'"); # quote the word 58 my ($num_expected_rows) = ( # look up the count unless it's AND, OR, or NOT 59 ($word =~ /^\s*(and|or|not|near)\s*$/i) ? 0 : ($word_count->{lc($word)} || 1)); 60 cmp_ok(scalar(@rows), "==", $num_expected_rows, "search '$word' ($filetype index from $dict)"); 61 } 62 62 DoSearch::close_index($index); 63 $words = undef;64 $word_count = undef;65 }66 }63 $words = undef; 64 $word_count = undef; 65 } 66 } 67 67 }; 68 68 … … 70 70 71 71 BEGIN { 72 use File::Path qw(mkpath);73 mkpath( ["blib/index"], 0, 0755);74 my $base = "C030";75 my (%out) = build_index(76 "data/C030-medsm-xml", "blib/index/$base.index");72 use File::Path qw(mkpath); 73 mkpath( ["blib/index"], 0, 0755); 74 my $base = "C030"; 75 my (%out) = build_index( 76 "data/C030-medsm-xml", "blib/index/$base.index"); 77 77 78 cmp_ok( scalar(%out), '>', 2, "Indexing output" );79 cmp_ok( $out{unique}, '==', 117468, 'unique words indexed' );80 cmp_ok( $out{properties}, '==', 4, 'num properties' );81 cmp_ok( $out{files}, '==', 1000, 'files indexed' );82 cmp_ok( $out{bytes}, '==', 16626260, 'bytes indexed' );83 cmp_ok( $out{words}, '==', 1513714, 'total words indexed' );78 cmp_ok( scalar(%out), '>', 2, "Indexing output" ); 79 cmp_ok( $out{unique}, '==', 117468, 'unique words indexed' ); 80 cmp_ok( $out{properties}, '==', 4, 'num properties' ); 81 cmp_ok( $out{files}, '==', 1000, 'files indexed' ); 82 cmp_ok( $out{bytes}, '==', 16626260, 'bytes indexed' ); 83 cmp_ok( $out{words}, '==', 1513714, 'total words indexed' ); 84 84 85 86 my @rows = do_search(87 "blib/index/$base.index", "swishe OR test");88 cmp_ok(scalar(@rows), '==', 14, "num results from 'swishe OR test'")85 86 my @rows = do_search( 87 "blib/index/$base.index", "swishe OR test"); 88 cmp_ok(scalar(@rows), '==', 14, "num results from 'swishe OR test'") 89 89 }; 90 90 Swishetest/trunk/t/050-C020-largeindex.t
r1968 r1970 13 13 BEGIN { 14 14 exit(0) unless $ENV{TEST_HUGE_INDEX}; 15 use File::Path qw(mkpath);16 mkpath( ["blib/index"], 0, 0755);17 my $base = "T050-$$"; # test 05015 use File::Path qw(mkpath); 16 mkpath( ["blib/index"], 0, 0755); 17 my $base = "T050-$$"; # test 050 18 18 warn "base is $base\n"; 19 my (%out) = BuildIndex::build_index_from_external_program(19 my (%out) = BuildIndex::build_index_from_external_program( 20 20 #"./make_collection -min_words=1000 -max_words=1000 -num_files=100", # this makes 920K of data, 2.33MB index, 476k propfile 21 21 #"./make_collection -min_words=10000 -max_words=10000 -num_files=1000", # this makes 38M index, 40MB prop … … 27 27 ); 28 28 29 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" );30 cmp_ok( $out{unique}, '==', 252983, 'unique words indexed' );31 cmp_ok( $out{properties}, '==', 5, 'num properties' );32 cmp_ok( $out{files}, '==', 2, 'files indexed' );33 cmp_ok( $out{bytes}, '==', 2896130, 'bytes indexed' );34 cmp_ok( $out{words}, '==', 280381, 'total words indexed' );29 cmp_ok( scalar(keys(%out)), '>', 2, "Indexing output" ); 30 cmp_ok( $out{unique}, '==', 252983, 'unique words indexed' ); 31 cmp_ok( $out{properties}, '==', 5, 'num properties' ); 32 cmp_ok( $out{files}, '==', 2, 'files indexed' ); 33 cmp_ok( $out{bytes}, '==', 2896130, 'bytes indexed' ); 34 cmp_ok( $out{words}, '==', 280381, 'total words indexed' ); 35 35 36 36 DoSearch::open_index( "blib/index/$base.index" ); 37 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test");37 my @rows = DoSearch::do_search( "blib/index/$base.index", "swishe OR test"); 38 38 DoSearch::close_index( "blib/index/$base.index" ); 39 cmp_ok(scalar(@rows), '==', 2, "num results from 'swishe OR test'")39 cmp_ok(scalar(@rows), '==', 2, "num results from 'swishe OR test'") 40 40 }; 41 41
