Changeset 1965

Show
Ignore:
Timestamp:
11/30/07 17:00:59 (6 months ago)
Author:
joshr
Message:

tabs replaced with spaces. We use ts=4 sw=4 in vi.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Swishetest/trunk/BuildIndex.pm

    r1963 r1965  
    77# returns a hash of info parsed from the indexing  
    88sub build_index_from_directory { 
    9        my ($input, $index, $config, $extra_options) = @_;  
    10        $config = "conf/basic-libxml2.conf" unless $config; 
    11        $extra_options = "" unless $extra_options;  
    12         my $output = `/usr/local/bin/swish-e -c $config -i '$input' -f '$index' -v 1 $extra_options`;    
    13        print STDERR "$0: Running '$output'\n" if $ENV{TEST_VERBOSE}; 
    14                # -v 1 is important, we use it to test the indexer  
    15        return parse_indexing_output( $output ); 
     9    my ($input, $index, $config, $extra_options) = @_;  
     10    $config = "conf/basic-libxml2.conf" unless $config; 
     11    $extra_options = "" unless $extra_options;  
     12    my $output = `/usr/local/bin/swish-e -c $config -i '$input' -f '$index' -v 1 $extra_options`;    
     13    print STDERR "$0: Running '$output'\n" if $ENV{TEST_VERBOSE}; 
     14        # -v 1 is important, we use it to test the indexer  
     15    return parse_indexing_output( $output ); 
    1616} 
    1717 
     
    1919# returns a hash of info parsed from the indexing  
    2020sub build_index_from_external_program { 
    21        my ($external_program, $index, $config, $extra_options) = @_;  
    22        $config = "conf/basic-libxml2.conf" unless $config; 
    23        $extra_options = "" unless $extra_options;  
    24        my $cmd = "$external_program | /usr/local/bin/swish-e -c $config -i stdin -f '$index' -v 1 -S prog $extra_options"; 
    25                # -v 1 is important, we use it to test the indexer  
    26        print STDERR "$0: Running '$cmd'\n" if $ENV{TEST_VERBOSE}; 
    27         my $output = `$cmd`;     
    28        return parse_indexing_output( $output ); 
     21    my ($external_program, $index, $config, $extra_options) = @_;  
     22    $config = "conf/basic-libxml2.conf" unless $config; 
     23    $extra_options = "" unless $extra_options;  
     24    my $cmd = "$external_program | /usr/local/bin/swish-e -c $config -i stdin -f '$index' -v 1 -S prog $extra_options"; 
     25        # -v 1 is important, we use it to test the indexer  
     26    print STDERR "$0: Running '$cmd'\n" if $ENV{TEST_VERBOSE}; 
     27    my $output = `$cmd`;     
     28    return parse_indexing_output( $output ); 
    2929} 
    3030 
     
    3232# returns a  hash of name->value pairs gleaned from the swish-e output 
    3333sub parse_indexing_output { 
    34        my $output = shift; 
    35        my @output = split(/\r|\n/, $output);   # both \n's and \r's are in $output. 
    36                # yup, @output and $output.  
    37        my %out;        # the hash of index output data that we'll return 
    38        my $numreg = '([0-9]+)'; 
    39        for(@output) { 
    40                 chomp();         
    41                s/,//g;         # remove all commas, they made parsing harder. 
     34    my $output = shift; 
     35    my @output = split(/\r|\n/, $output);   # both \n's and \r's are in $output. 
     36        # yup, @output and $output.  
     37    my %out;    # the hash of index output data that we'll return 
     38    my $numreg = '([0-9]+)'; 
     39    for(@output) { 
     40        chomp();     
     41        s/,//g;     # remove all commas, they made parsing harder. 
    4242 
    43                 print "PROCESSING: $_\n" if defined($ENV{TEST_VERBOSE}) && $ENV{TEST_VERBOSE} > 1;       
     43        print "PROCESSING: $_\n" if defined($ENV{TEST_VERBOSE}) && $ENV{TEST_VERBOSE} > 1;   
    4444 
    45                $out{unique} = $1               if /^\s*($numreg)\s+unique\s+words?\s+indexed/; 
    46                $out{properties} = $1   if /^\s*($numreg)\s+properties/; 
    47                $out{files} = $1        if /^\s*($numreg)\s+files?\s+indexed/; 
    48                $out{bytes} = $1        if  /\s($numreg)\s+total\s+byte/; 
    49                $out{words} = $1        if  /\s($numreg)\s+total\s+word/; 
    50        
    51        die "Couldn't get data from swish-e index build, got " .  
     45        $out{unique} = $1       if /^\s*($numreg)\s+unique\s+words?\s+indexed/; 
     46        $out{properties} = $1   if /^\s*($numreg)\s+properties/; 
     47        $out{files} = $1        if /^\s*($numreg)\s+files?\s+indexed/; 
     48        $out{bytes} = $1        if  /\s($numreg)\s+total\s+byte/; 
     49        $out{words} = $1        if  /\s($numreg)\s+total\s+word/; 
     50   
     51    die "Couldn't get data from swish-e index build, got " .  
    5252        join(", ", map { "$_ = {$out{$_}}" } keys(%out)) . "\n(output was " . join("\n", @output) . ")"  
    53                        unless (scalar(keys(%out)) == 5); 
    54        return %out; 
     53            unless (scalar(keys(%out)) == 5); 
     54    return %out; 
    5555} 
    5656 
  • Swishetest/trunk/Changes

    r1961 r1965  
    55    - updated copyright dates through 2007 
    660.04  Thu Nov 29 16:30:00 2007 
    7        - there are 5 properties now, not 4 
    8        - use Carp to explain warnings and errors 
     7    - there are 5 properties now, not 4 
     8    - use Carp to explain warnings and errors 
    99    - make each .pm file an actual module. 
    10        - comment updates 
     10    - comment updates 
    1111    - package up for Peter Karman 
    12120.03  Tue Apr 12 20:08:00 2005 
    13        - major rewrite, code made more C-like.   
    14        - Test XML, Text and HTML parsers on complete FC1 and OSX dictionaries (t/030*) 
    15        - t/030* respects MAX_INDEX_FILES env variable for max index size 
     13    - major rewrite, code made more C-like.   
     14    - Test XML, Text and HTML parsers on complete FC1 and OSX dictionaries (t/030*) 
     15    - t/030* respects MAX_INDEX_FILES env variable for max index size 
    16160.02  Tue Dec 21 10:10:00 2004 
    17        - started structure with dirs qw(conf data index) 
    18        - wrote collection data/C001-trivial-txt/(a|b|c).txt 
    19        - wrote collection data/C002-trivial-html/(a|b|c).html 
    20        - wrote collection data/C003-trivial-xml/(a|b|c).xml 
    21        - copied OS X's /usr/share/dict/words to data/C020-words-txt/words.txt 
    22        - setup ./mkmanifest script to update manifest based on . and t/*.t 
    23        - wrote Swishtest.pm ::do_search()  and ::build_index(), and docs 
    24        - wrote 000-basicload.t, 001-SWISHAPI.t,  
    25        - wrote t/010*.t and t/010*.t, which test building indices from data/C010-trivial-txt/* 
    26        - wrote t/011-C011-trivial-html.t, t/012-C012-trivial-xml.t, &  
    27          t/020-C020-wordsbasic-txt.t, each for its respective C0##  
    28        - wrote make_collection (see ./make_collection -h) 
    29        - wrote t/030-C030-extprog-xml.t , which uses data/C030- (from make_collection) 
    30        - wrote t/040-C040-swishedocs-html.t which indexes swish-e docs at data/C040-... 
     17    - started structure with dirs qw(conf data index) 
     18    - wrote collection data/C001-trivial-txt/(a|b|c).txt 
     19    - wrote collection data/C002-trivial-html/(a|b|c).html 
     20    - wrote collection data/C003-trivial-xml/(a|b|c).xml 
     21    - copied OS X's /usr/share/dict/words to data/C020-words-txt/words.txt 
     22    - setup ./mkmanifest script to update manifest based on . and t/*.t 
     23    - wrote Swishtest.pm ::do_search()  and ::build_index(), and docs 
     24    - wrote 000-basicload.t, 001-SWISHAPI.t,  
     25    - wrote t/010*.t and t/010*.t, which test building indices from data/C010-trivial-txt/* 
     26    - wrote t/011-C011-trivial-html.t, t/012-C012-trivial-xml.t, &  
     27      t/020-C020-wordsbasic-txt.t, each for its respective C0##  
     28    - wrote make_collection (see ./make_collection -h) 
     29    - wrote t/030-C030-extprog-xml.t , which uses data/C030- (from make_collection) 
     30    - wrote t/040-C040-swishedocs-html.t which indexes swish-e docs at data/C040-... 
    31310.01  Mon Dec 20 12:42:02 2004 
    32        - original version; created by h2xs 1.22 with options 
    33                -n Swishetest -v 0.01 -XA 
     32    - original version; created by h2xs 1.22 with options 
     33        -n Swishetest -v 0.01 -XA 
    3434 
  • Swishetest/trunk/DoSearch.pm

    r1959 r1965  
    55use warnings; 
    66 
    7 my %swishes = ();      # map of filename -> SWISH::API 
     7my %swishes = ();   # map of filename -> SWISH::API 
    88 
    99# given an index filename and a query, opens the index if needed,  
     
    1212 
    1313sub open_index {  
    14        my $index = shift; 
    15        if (!exists($swishes{$index})) { 
    16                my $swish = $swishes{$index} = SWISH::API->new( $index ); 
    17                die "$0: index $index could not be opened.\n" unless $swish; 
    18                print STDERR "Index $index opened\n" if $ENV{TEST_VERBOSE}; 
    19        
    20        return 1; 
     14    my $index = shift; 
     15    if (!exists($swishes{$index})) { 
     16        my $swish = $swishes{$index} = SWISH::API->new( $index ); 
     17        die "$0: index $index could not be opened.\n" unless $swish; 
     18        print STDERR "Index $index opened\n" if $ENV{TEST_VERBOSE}; 
     19   
     20    return 1; 
    2121} 
    2222 
    2323sub close_index { 
    24        my $index = shift; 
    25        if (exists($swishes{$index})) { 
    26                delete $swishes{$index};        # remove from the hash, should close it 
    27        } else {  
    28                die "$0: index $index was not open.\n"; 
    29        
     24    my $index = shift; 
     25    if (exists($swishes{$index})) { 
     26        delete $swishes{$index};    # remove from the hash, should close it 
     27    } else {  
     28        die "$0: index $index was not open.\n"; 
     29   
    3030} 
    3131sub do_search { 
    32         my ($index, $query) = @_;        
    33        my @r = (); 
    34        #return @r unless $query; 
    35        my $swish; 
    36        eval { 
    37                if (exists($swishes{$index})) { 
    38                        $swish = $swishes{$index}; 
    39                } else {  
    40                        #$swish = $swishes{$index} = SWISH::API->new( $index ); 
    41                        die "$0: index $index was not opened.\n"; 
    42                
    43                #print STDERR "Searching for $query\n" if $ENV{TEST_VERBOSE}; 
    44                my $results = $swish->Query( $query ); 
    45                my @props = map { $_->Name } ($swish->PropertyList( $index ) ); 
    46                if ($swish->Error()) { 
    47                        #print STDERR "$0: Error searching for $query: " . $swish->ErrorString(); 
    48                        return @r; 
    49                
     32    my ($index, $query) = @_;    
     33    my @r = (); 
     34    #return @r unless $query; 
     35    my $swish; 
     36    eval { 
     37        if (exists($swishes{$index})) { 
     38            $swish = $swishes{$index}; 
     39        } else {  
     40            #$swish = $swishes{$index} = SWISH::API->new( $index ); 
     41            die "$0: index $index was not opened.\n"; 
     42       
     43        #print STDERR "Searching for $query\n" if $ENV{TEST_VERBOSE}; 
     44        my $results = $swish->Query( $query ); 
     45        my @props = map { $_->Name } ($swish->PropertyList( $index ) ); 
     46        if ($swish->Error()) { 
     47            #print STDERR "$0: Error searching for $query: " . $swish->ErrorString(); 
     48            return @r; 
     49       
    5050 
    51                while ( my $result = $results->NextResult() ) { 
    52                        my %h; 
    53                        for my $p (@props) { $h{$p} = $result->Property($p); } 
    54                        push(@r, \%h); 
    55                
    56        };      # end eval{} 
    57        if ($@) { 
    58                my $str = "$0: test failed: $@"; 
    59                if ($swish && $swish->Error()) { 
    60                        $str .= " (" . $swish->ErrorString() . ")"; 
    61                
    62        
    63        return @r; 
     51        while ( my $result = $results->NextResult() ) { 
     52            my %h; 
     53            for my $p (@props) { $h{$p} = $result->Property($p); } 
     54            push(@r, \%h); 
     55       
     56    };  # end eval{} 
     57    if ($@) { 
     58        my $str = "$0: test failed: $@"; 
     59        if ($swish && $swish->Error()) { 
     60            $str .= " (" . $swish->ErrorString() . ")"; 
     61       
     62   
     63    return @r; 
    6464} 
    6565 
  • Swishetest/trunk/GetDictionaryWords.pm

    r1959 r1965  
    88# returns ref to array of the read words and ref to hash of word->count 
    99sub get_dictionary_words { 
    10        my $dict = shift; 
    11         my ($case_sensitive) = (shift || 0);             
    12        my ($max_words) = (shift || 0);         # 0 means don't limit 
    13        my @words; 
    14        my %word_count; 
    15        # Load words. Repeats are OK 
    16        print STDERR "Loading dictionary...\n" if $ENV{TEST_VERBOSE}; 
    17        open (my $fh, "<", $dict)|| die "$0: Couldn't open $dict: $!";  
    18        #for($num_words = 0; $words[$num_words] = <$fh>; ) {  
    19        while( defined( my $word = <$fh> ) && ($max_words == 0 || scalar(@words) < $max_words)) { 
    20                $word =~ s/[-',.<>]//g;         # strip stuff 
    21                chomp $word;                    # strip newline 
    22                $word =~ s/^\s+//; 
    23                $word =~ s/\s+$//; 
    24                 if($word =~ /^$/) {      
    25                        warn "Skipping empty non-word '$word'\n"; 
    26                        next; 
    27                }  
    28                push(@words, $word); 
    29                my ($counted_word) = ($case_sensitive ? $word : lc($word)); 
    30                $word_count{$counted_word}++; 
    31        
    32        close $fh || die "$0: Couldn't close $dict: $!"; 
    33        return (\@words, \%word_count); 
     10    my $dict = shift; 
     11    my ($case_sensitive) = (shift || 0);         
     12    my ($max_words) = (shift || 0);     # 0 means don't limit 
     13    my @words; 
     14    my %word_count; 
     15    # Load words. Repeats are OK 
     16    print STDERR "Loading dictionary...\n" if $ENV{TEST_VERBOSE}; 
     17    open (my $fh, "<", $dict)|| die "$0: Couldn't open $dict: $!";  
     18    #for($num_words = 0; $words[$num_words] = <$fh>; ) {  
     19    while( defined( my $word = <$fh> ) && ($max_words == 0 || scalar(@words) < $max_words)) { 
     20        $word =~ s/[-',.<>]//g;     # strip stuff 
     21        chomp $word;            # strip newline 
     22        $word =~ s/^\s+//; 
     23        $word =~ s/\s+$//; 
     24        if($word =~ /^$/) {  
     25            warn "Skipping empty non-word '$word'\n"; 
     26            next; 
     27        }  
     28        push(@words, $word); 
     29        my ($counted_word) = ($case_sensitive ? $word : lc($word)); 
     30        $word_count{$counted_word}++; 
     31   
     32    close $fh || die "$0: Couldn't close $dict: $!"; 
     33    return (\@words, \%word_count); 
    3434} 
    3535 
  • Swishetest/trunk/MinMax.pm

    r1959 r1965  
    44 
    55sub max { 
    6        my $max = shift; 
    7        for(@_) { $max = $_ if $_ > $max; } 
    8        return $max; 
     6    my $max = shift; 
     7    for(@_) { $max = $_ if $_ > $max; } 
     8    return $max; 
    99} 
    1010sub min { 
    11        my $min = shift; 
    12        for(@_) { $min = $_ if $_ < $min; } 
    13        return $min; 
     11    my $min = shift; 
     12    for(@_) { $min = $_ if $_ < $min; } 
     13    return $min; 
    1414} 
    15151; 
  • Swishetest/trunk/NotRand.pm

    r1959 r1965  
    1414use vars qw( $last ); 
    1515sub not_rand { 
    16        my $max = $_[0] || 1;   # if no value is passed, we return '0' or '1' 
    17        use integer;    # is it faster not to use integer? No, it's faster to USE int. 
    18        $last = 1 unless defined($last); 
    19         $last = ($last*21+1);    
    20                # from "Advanced Perl Programming", 4.4 Using Closures.  
    21                # We truncate to 30 bits to preclude system overflow and thereby be more portable 
    22                # that would be 'mod 2 ** 30' (1,073,741,824), which makes sense, 4.2G over 4 
    23        $last %= 1_073_741_824; # that's 2 to the 30th 
    24        #print "rand of $max is " . abs($last % $max) . "\n"; 
    25        return abs($last % $max);       # abs isn't needed 
     16    my $max = $_[0] || 1;   # if no value is passed, we return '0' or '1' 
     17    use integer;    # is it faster not to use integer? No, it's faster to USE int. 
     18    $last = 1 unless defined($last); 
     19    $last = ($last*21+1);    
     20        # from "Advanced Perl Programming", 4.4 Using Closures.  
     21        # We truncate to 30 bits to preclude system overflow and thereby be more portable 
     22        # that would be 'mod 2 ** 30' (1,073,741,824), which makes sense, 4.2G over 4 
     23    $last %= 1_073_741_824; # that's 2 to the 30th 
     24    #print "rand of $max is " . abs($last % $max) . "\n"; 
     25    return abs($last % $max);   # abs isn't needed 
    2626} 
    2727 
  • Swishetest/trunk/Swishetest.pm

    r1960 r1965  
    3333  use Swishetest qw(build_index do_search); 
    3434  use Data::Dumper qw(Dumper); 
    35          
     35     
    3636  my %info = build_index( "input/data", "out/myindex.index");  
    37        # 3rd & 4th params 'configfile' and 'extraoptions' are optional 
     37    # 3rd & 4th params 'configfile' and 'extraoptions' are optional 
    3838  print Dumper( \%info ); 
    3939  open_index("myindex.index"); 
    40   my @rows = do_search( "myindex.index", "this is the search" );        
    41                # returns a list of hashrefs 
     40  my @rows = do_search( "myindex.index", "this is the search" );     
     41        # returns a list of hashrefs 
    4242  close_index("myindex.index"); 
    4343  print Dumper( \@rows ); 
  • Swishetest/trunk/make_collection

    r1964 r1965  
    1919my $min_words_per_file=1; 
    2020my $max_words_per_file=1; 
    21 my $num_files=0;       # 0 means one file for each word in dictionary 
    22 my $num_words;         # should be scalar(@words) 
    23 my $base_dir = "";     # empty base_dir means be an -S prog external program 
    24 my $randommode = 1;     
    25        # in randommode, words are randomly chosen, otherwise words are sequential from the dict 
    26 my $englishify = 0;    # insert commas, periods, and caps? 
    27 my $filetype = "xml"; # type of file to create. can also be 'html' or 'txt' 
     21my $num_files=0;    # 0 means one file for each word in dictionary 
     22my $num_words;      # should be scalar(@words) 
     23my $base_dir = "";  # empty base_dir means be an -S prog external program 
     24my $randommode = 1;  
     25    # in randommode, words are randomly chosen, otherwise words are sequential from the dict 
     26my $englishify = 0; # insert commas, periods, and caps? 
     27my $filetype = "xml";   # type of file to create. can also be 'html' or 'txt' 
    2828my $verbose = 0; 
    2929my $progress = 1; 
     
    3131 
    3232sub Usage { 
    33        return "make_collection: [--dict=words.txt] [--base_dir=/your/location]\n" . 
    34               "[--min_words_per_file=#] [--max_words_per_file=#] [--num_files=#]\n" . 
    35                   "[--verbose] [--englishify] [--filetype=(txt|html|xml)] [--(no)randommode]\n" . 
    36               "   Makes a set of (possibly random) xml, html, or txt files based on a dict.\n"; 
     33    return "make_collection: [--dict=words.txt] [--base_dir=/your/location]\n" . 
     34           "[--min_words_per_file=#] [--max_words_per_file=#] [--num_files=#]\n" . 
     35           "[--verbose] [--englishify] [--filetype=(txt|html|xml)] [--(no)randommode]\n" . 
     36           "   Makes a set of (possibly random) xml, html, or txt files based on a dict.\n"; 
    3737} 
    3838 
     
    6767    my ($words, $word_counts) = GetDictionaryWords::get_dictionary_words( $dict ); 
    6868 
    69     if ($num_files == 0) {      
     69    if ($num_files == 0) {  
    7070        $num_files = scalar(@$words); 
    7171        print STDERR "$prog: set num_files to $num_files\n" if $verbose; 
     
    8282        } 
    8383        #if (($i+1) % 1000 == 0) { print STDERR "** working on file $i"; } 
    84         my $this_file_words = # choose how many words will be in the file 
     84        my $this_file_words =   # choose how many words will be in the file 
    8585            int( not_rand( $max_words_per_file - $min_words_per_file + 1 ) ) + $min_words_per_file; 
    8686        my $doc =""; 
    87         my $toCap = 1; # should we Capitalize the coming word? 
     87        my $toCap = 1;  # should we Capitalize the coming word? 
    8888        for(my $j = 0; $j < $this_file_words; $j++, $wordcounter++) 
    8989        { 
     
    9292 
    9393            if ($toCap) { $toadd = "\u$toadd"; $toCap = 0; } 
    94             if (!defined($toadd)) { next; }     
     94            if (!defined($toadd)) { next; }  
    9595            $doc .= $toadd; 
    9696            if ($englishify) { 
    97                 my $r = int(not_rand(10000));  # random number we use to plop in punctuation & line breaks 
     97                my $r = int(not_rand(10000));   # random number we use to plop in punctuation & line breaks 
    9898                if ($j == $this_file_words-1 || $r % 9 == 0) { $doc .= ". "; $toCap = 1; } 
    9999                elsif ($r % 7 == 0) { $doc .= ","; } 
     
    107107            $doc = simple_xmlify( $doc ); 
    108108        } elsif ($filetype =~ /^html$/i) { 
    109             $doc = simple_htmlify( extract_title($doc), $doc );        # title, content 
     109            $doc = simple_htmlify( extract_title($doc), $doc ); # title, content 
    110110        } else { 
    111111            $doc = simple_txtify( $doc ); 
     
    126126# one block of text in xml 
    127127sub simple_xmlify { 
    128        return qq{<?xml version="1.0" encoding="ISO-8859-1"?>\n<swishdefault>\n} .  
    129                $_[0] . "\n</swishdefault>\n\n";  
     128    return qq{<?xml version="1.0" encoding="ISO-8859-1"?>\n<swishdefault>\n} .  
     129        $_[0] . "\n</swishdefault>\n\n";  
    130130} 
    131131 
    132132# one block of text in txt 
    133133sub simple_txtify { 
    134        return "$_[0]\n"; 
     134    return "$_[0]\n"; 
    135135} 
    136136 
     
    138138# one block of text, with a title, in html 
    139139sub simple_htmlify { 
    140        my ($title, $content) = @_; 
    141        my $html = <<EOF; 
     140    my ($title, $content) = @_; 
     141    my $html = <<EOF; 
    142142<html> 
    143143<head> 
     
    153153             
    154154EOF 
    155        return $html; 
     155    return $html; 
    156156} 
    157157 
     
    172172# return a title up to N characters based on the first 10 words. 
    173173sub extract_title { 
    174        my $doc = shift; 
    175        my $maxtitlewords = 10; 
    176        my $maxtitlelen = 25; 
    177        my @w = split(' ', $doc, $maxtitlewords + 1); 
    178         my $title = "";  
    179        for(my $i=0; $i < scalar(@w) && length($title)+length($w[$i]) <= $maxtitlelen; $i++) { 
    180                $title .= "$w[$i] "; 
    181        
    182        chop($title);   # remove the ' ', sloppy and simple 
    183        return $title; 
     174    my $doc = shift; 
     175    my $maxtitlewords = 10; 
     176    my $maxtitlelen = 25; 
     177    my @w = split(' ', $doc, $maxtitlewords + 1); 
     178    my $title = "";  
     179    for(my $i=0; $i < scalar(@w) && length($title)+length($w[$i]) <= $maxtitlelen; $i++) { 
     180        $title .= "$w[$i] "; 
     181   
     182    chop($title);   # remove the ' ', sloppy and simple 
     183    return $title; 
    184184} 
    185185 
    186186# given an extension, choose a parser 
    187187sub choose_parser { 
    188        my $ext = $_[0]; 
    189        if ($ext =~ /^xml$/i) { 
    190                return "XML2"; 
    191        } elsif ($ext =~ /^html?$/i) { 
    192                return "HTML2";  
    193        }  
    194        return "TXT"; 
     188    my $ext = $_[0]; 
     189    if ($ext =~ /^xml$/i) { 
     190        return "XML2"; 
     191    } elsif ($ext =~ /^html?$/i) { 
     192        return "HTML2";  
     193    }  
     194    return "TXT"; 
    195195} 
    196196