| 21 | | warn "base is $base\n"; |
|---|
| 22 | | my (%out) = BuildIndex::build_index_from_external_program( |
|---|
| 23 | | #"./make_collection -min_words=1000 -max_words=1000 -num_files=100", # this makes 920K of data, 2.33MB index, 476k propfile |
|---|
| 24 | | #"./make_collection -min_words=10000 -max_words=10000 -num_files=1000", # this makes 38M index, 40MB prop |
|---|
| 25 | | #"./make_collection -min_words=100000 -max_words=100000 -num_files=1000", # this makes 325MB index, 392MB props |
|---|
| 26 | | "./make_collection -min_words=100000 -max_words=100000 -num_files=10000", # this makes ~3.1gb+2.5gb or so |
|---|
| 27 | | "blib/index/$base.index", |
|---|
| 28 | | "", # default config |
|---|
| 29 | | "-e" # economy option |
|---|
| 30 | | ); |
|---|
| | 25 | #my $base = "T050-28400"; # test 050 |
|---|
| | 26 | unless (-e "blib/index/$base.index" ) { |
|---|
| | 27 | warn "base is $base\n"; |
|---|
| | 28 | my (%out) = BuildIndex::build_index_from_external_program( |
|---|
| 32 | | # the real test here is if you get an error indexing above :) |
|---|
| | 30 | #"./make_collection -min_words=1000 -max_words=1000 -num_files=100", |
|---|
| | 31 | # # this makes 920K of data, 2.33MB index, 476k propfile |
|---|
| | 32 | |
|---|
| | 33 | #"./make_collection -min_words=10000 -max_words=10000 -num_files=1000", |
|---|
| | 34 | # # this makes 38M index, 40MB prop |
|---|
| | 35 | |
|---|
| | 36 | #"./make_collection -min_words=100000 -max_words=100000 -num_files=1000", |
|---|
| | 37 | # # this makes 325MB index, 392MB props |
|---|
| | 38 | |
|---|
| | 39 | "./make_collection -min_words=100000 -max_words=100000 -num_files=10000", |
|---|
| | 40 | # this makes: 3.84G blib/index/T050-28400.index.prop |
|---|
| | 41 | # 3.16G blib/index/T050-28400.index |
|---|
| | 42 | |
|---|
| | 43 | "blib/index/$base.index", |
|---|
| | 44 | "", # default config |
|---|
| | 45 | "-e" # economy option |
|---|
| | 46 | ); |
|---|
| | 47 | |
|---|
| | 48 | # the first real test here is if you get an error indexing above :) |
|---|
| | 49 | |
|---|
| | 50 | #print Dumper( \%out ); |
|---|