Changeset 1275

Show
Ignore:
Timestamp:
06/09/03 10:12:47 (5 years ago)
Author:
jmruiz
Message:

Use only one routine for snowvall's stemmer (Stem_snowball) instead of
one per language (Stem_es, Stem_fr, etc)

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/swish-e/src/index.c

    r1273 r1275  
    24802480                case FUZZY_STEMMING_EN1: 
    24812481                case FUZZY_STEMMING_EN2: 
    2482                     stem_return = indexf->header.fuzzy_data.fuzzy_routine(&swishword, &lenswishword,indexf->header.fuzzy_data.snowball); 
     2482                    stem_return = indexf->header.fuzzy_data.fuzzy_routine(&swishword, &lenswishword,indexf->header.fuzzy_data.snowball,indexf->header.fuzzy_data.lang_stem); 
    24832483#else 
    24842484                    stem_return = indexf->header.fuzzy_data.fuzzy_routine(&swishword, &lenswishword); 
  • trunk/swish-e/src/stemmer.c

    r1273 r1275  
    531531 
    532532#ifdef SNOWBALL 
    533 int     Stem(char **inword, int *lenword, struct SN_env  *snowball
     533int     Stem(char **inword, int *lenword, struct SN_env  *snowball, int (*lang_stem)(struct SN_env *)
    534534#else 
    535535int     Stem(char **inword, int *lenword) 
     
    618618    char            *name; 
    619619#ifdef SNOWBALL 
    620     int             (*routine) (char **, int *, struct SN_env  *); 
     620    int             (*routine) (char **, int *, struct SN_env  *, int (*lang_stem)(struct SN_env *)); 
    621621    struct SN_env  *(*init) (void); 
    622622    void           (*free) (struct SN_env *); 
     623    int            (*lang_stem)(struct SN_env *); 
    623624#else 
    624625    int             (*routine) (char **, int *); 
    625626    void           *dummy1; 
    626627    void           *dummy2; 
     628    int            *dummy3; 
    627629#endif 
    628630} 
     
    631633static FUZZY_OPTS fuzzy_opts[] = { 
    632634 
    633     { FUZZY_NONE, "None", NULL, NULL, NULL }, 
    634     { FUZZY_STEMMING_EN, "Stemming_en", Stem, NULL, NULL }, 
    635     { FUZZY_STEMMING_EN, "Stem", Stem, NULL, NULL }, 
    636     { FUZZY_SOUNDEX, "Soundex", NULL, NULL, NULL }, 
    637     { FUZZY_METAPHONE, "Metaphone", NULL, NULL, NULL }, 
    638     { FUZZY_DOUBLE_METAPHONE, "DoubleMetaphone", NULL, NULL, NULL
    639 #ifdef SNOWBALL 
    640     ,{ FUZZY_STEMMING_ES, "Stemming_es", Stem_es, spanish_create_env, spanish_close_env }, 
    641     { FUZZY_STEMMING_FR, "Stemming_fr", Stem_fr, french_create_env, french_close_env }, 
    642     { FUZZY_STEMMING_IT, "Stemming_it", Stem_it, italian_create_env, italian_close_env }, 
    643     { FUZZY_STEMMING_PT, "Stemming_pt", Stem_pt, portuguese_create_env, portuguese_close_env }, 
    644     { FUZZY_STEMMING_DE, "Stemming_de", Stem_de, german_create_env, german_close_env }, 
    645     { FUZZY_STEMMING_NL, "Stemming_nl", Stem_nl, dutch_create_env, dutch_close_env }, 
    646     { FUZZY_STEMMING_EN1, "Stemming_en1", Stem_en1, porter_create_env, porter_close_env }, 
    647     { FUZZY_STEMMING_EN2, "Stemming_en2", Stem_en2, english_create_env, english_close_env
     635    { FUZZY_NONE, "None", NULL, NULL, NULL, NULL }, 
     636    { FUZZY_STEMMING_EN, "Stemming_en", Stem, NULL, NULL, NULL }, 
     637    { FUZZY_STEMMING_EN, "Stem", Stem, NULL, NULL, NULL }, 
     638    { FUZZY_SOUNDEX, "Soundex", NULL, NULL, NULL, NULL }, 
     639    { FUZZY_METAPHONE, "Metaphone", NULL, NULL, NULL, NULL }, 
     640    { FUZZY_DOUBLE_METAPHONE, "DoubleMetaphone", NULL, NULL, NULL, NULL
     641#ifdef SNOWBALL 
     642    ,{ FUZZY_STEMMING_ES, "Stemming_es", Stem_snowball, spanish_create_env, spanish_close_env, spanish_stem }, 
     643    { FUZZY_STEMMING_FR, "Stemming_fr", Stem_snowball, french_create_env, french_close_env, french_stem }, 
     644    { FUZZY_STEMMING_IT, "Stemming_it", Stem_snowball, italian_create_env, italian_close_env, italian_stem }, 
     645    { FUZZY_STEMMING_PT, "Stemming_pt", Stem_snowball, portuguese_create_env, portuguese_close_env, portuguese_stem }, 
     646    { FUZZY_STEMMING_DE, "Stemming_de", Stem_snowball, german_create_env, german_close_env, german_stem }, 
     647    { FUZZY_STEMMING_NL, "Stemming_nl", Stem_snowball, dutch_create_env, dutch_close_env, dutch_stem }, 
     648    { FUZZY_STEMMING_EN1, "Stemming_en1", Stem_snowball, porter_create_env, porter_close_env, porter_stem }, 
     649    { FUZZY_STEMMING_EN2, "Stemming_en2", Stem_snowball, english_create_env, english_close_env, english_stem
    648650#endif 
    649651}; 
     
    659661            fi->fuzzy_routine = fuzzy_opts[i].routine; 
    660662#ifdef SNOWBALL 
     663            if(fuzzy_opts[i].lang_stem) 
     664                fi->lang_stem = fuzzy_opts[i].lang_stem; 
     665 
    661666            if(fuzzy_opts[i].init) 
    662667                fi->snowball = fuzzy_opts[i].init(); 
     
    684689            fi->fuzzy_routine = fuzzy_opts[i].routine; 
    685690#ifdef SNOWBALL 
     691            if(fuzzy_opts[i].lang_stem) 
     692                fi->lang_stem = fuzzy_opts[i].lang_stem; 
     693 
    686694            if(fuzzy_opts[i].init) 
    687695                fi->snowball = fuzzy_opts[i].init(); 
     
    712720                fuzzy_opts[i].free(fi->snowball); 
    713721 
     722            fi->lang_stem = NULL; 
    714723            fi->snowball = NULL; 
    715724#endif 
     
    720729    fi->fuzzy_routine = NULL; 
    721730#ifdef SNOWBALL 
     731    fi->lang_stem = NULL; 
    722732    fi->snowball = NULL; 
    723733#endif 
     
    751761 
    752762#ifdef SNOWBALL 
    753 /* 06/2003 Jose Ruiz - Interface to snowball's spanish stemmer */ 
    754 int     Stem_es(char **inword, int *lenword, struct SN_env *snowball
     763/* 06/2003 Jose Ruiz - Interface to snowball's stemmer */ 
     764int     Stem_snowball(char **inword, int *lenword, struct SN_env *snowball, int (*lang_stem)(struct SN_env *)
    755765{ 
    756766    int new_lenword; 
    757767 
    758768    SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    759     spanish_stem(snowball); 
     769    lang_stem(snowball); 
    760770 
    761771    if((*lenword) < snowball->l) 
     
    769779} 
    770780 
    771 /* 06/2003 Jose Ruiz - Interface to snowball's french stemmer */ 
    772 int     Stem_fr(char **inword, int *lenword, struct SN_env *snowball) 
    773 
    774     int new_lenword; 
    775  
    776     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    777     french_stem(snowball); 
    778  
    779     if((*lenword) < snowball->l) 
    780     { 
    781         efree(*inword); 
    782         *inword = emalloc(snowball->l + 1); 
    783         *lenword = snowball->l; 
    784     } 
    785     memcpy(*inword, snowball->p, snowball->l); 
    786     (*inword)[snowball->l] = '\0'; 
    787 
    788  
    789 /* 06/2003 Jose Ruiz - Interface to snowball's italian stemmer */ 
    790 int     Stem_it(char **inword, int *lenword, struct SN_env *snowball) 
    791 
    792     int new_lenword; 
    793  
    794     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    795     italian_stem(snowball); 
    796  
    797     if((*lenword) < snowball->l) 
    798     { 
    799         efree(*inword); 
    800         *inword = emalloc(snowball->l + 1); 
    801         *lenword = snowball->l; 
    802     } 
    803     memcpy(*inword, snowball->p, snowball->l); 
    804     (*inword)[snowball->l] = '\0'; 
    805 
    806  
    807 /* 06/2003 Jose Ruiz - Interface to snowball's portuguese stemmer */ 
    808 int     Stem_pt(char **inword, int *lenword, struct SN_env *snowball) 
    809 
    810     int new_lenword; 
    811  
    812     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    813     portuguese_stem(snowball); 
    814  
    815     if((*lenword) < snowball->l) 
    816     { 
    817         efree(*inword); 
    818         *inword = emalloc(snowball->l + 1); 
    819         *lenword = snowball->l; 
    820     } 
    821     memcpy(*inword, snowball->p, snowball->l); 
    822     (*inword)[snowball->l] = '\0'; 
    823 
    824  
    825 /* 06/2003 Jose Ruiz - Interface to snowball's german stemmer */ 
    826 int     Stem_de(char **inword, int *lenword, struct SN_env *snowball) 
    827 
    828     int new_lenword; 
    829  
    830     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    831     german_stem(snowball); 
    832  
    833     if((*lenword) < snowball->l) 
    834     { 
    835         efree(*inword); 
    836         *inword = emalloc(snowball->l + 1); 
    837         *lenword = snowball->l; 
    838     } 
    839     memcpy(*inword, snowball->p, snowball->l); 
    840     (*inword)[snowball->l] = '\0'; 
    841 
    842  
    843 /* 06/2003 Jose Ruiz - Interface to snowball's dutch stemmer */ 
    844 int     Stem_nl(char **inword, int *lenword, struct SN_env *snowball) 
    845 
    846     int new_lenword; 
    847  
    848     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    849     dutch_stem(snowball); 
    850  
    851     if((*lenword) < snowball->l) 
    852     { 
    853         efree(*inword); 
    854         *inword = emalloc(snowball->l + 1); 
    855         *lenword = snowball->l; 
    856     } 
    857     memcpy(*inword, snowball->p, snowball->l); 
    858     (*inword)[snowball->l] = '\0'; 
    859 
    860  
    861 /* 06/2003 Jose Ruiz - Interface to snowball's english porter(1) stemmer */ 
    862 int     Stem_en1(char **inword, int *lenword, struct SN_env *snowball) 
    863 
    864     int new_lenword; 
    865  
    866     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    867     porter_stem(snowball); 
    868  
    869     if((*lenword) < snowball->l) 
    870     { 
    871         efree(*inword); 
    872         *inword = emalloc(snowball->l + 1); 
    873         *lenword = snowball->l; 
    874     } 
    875     memcpy(*inword, snowball->p, snowball->l); 
    876     (*inword)[snowball->l] = '\0'; 
    877 
    878  
    879 /* 06/2003 Jose Ruiz - Interface to snowball's english porter(2) stemmer */ 
    880 int     Stem_en2(char **inword, int *lenword, struct SN_env *snowball) 
    881 
    882     int new_lenword; 
    883  
    884     SN_set_current(snowball,strlen(*inword),*inword); /* Set Word to Stem */ 
    885     english_stem(snowball); 
    886  
    887     if((*lenword) < snowball->l) 
    888     { 
    889         efree(*inword); 
    890         *inword = emalloc(snowball->l + 1); 
    891         *lenword = snowball->l; 
    892     } 
    893     memcpy(*inword, snowball->p, snowball->l); 
    894     (*inword)[snowball->l] = '\0'; 
    895 
    896  
    897 #endif 
     781#endif 
  • trunk/swish-e/src/stemmer.h

    r1273 r1275  
    1313     
    1414#ifdef SNOWBALL 
    15 int Stem (char **, int *, struct SN_env *); 
    16 int Stem_es (char **, int *, struct SN_env *); 
    17 int Stem_fr (char **, int *, struct SN_env *); 
    18 int Stem_it (char **, int *, struct SN_env *); 
    19 int Stem_pt (char **, int *, struct SN_env *); 
    20 int Stem_de (char **, int *, struct SN_env *); 
    21 int Stem_nl (char **, int *, struct SN_env *); 
    22 int Stem_en1 (char **, int *, struct SN_env *); 
    23 int Stem_en2 (char **, int *, struct SN_env *); 
     15int Stem (char **, int *, struct SN_env *, int (*lang_stem)(struct SN_env *)); 
     16int Stem_snowball (char **, int *, struct SN_env *, int (*lang_stem)(struct SN_env *)); 
    2417#else 
    2518int Stem (char **, int *); 
  • trunk/swish-e/src/swish.h

    r1273 r1275  
    471471    FuzzyIndexType fuzzy_mode; 
    472472#ifdef SNOWBALL 
    473     int     (*fuzzy_routine) (char **, int *, void *); 
     473    int     (*fuzzy_routine) (char **, int *, void *, int (*lang_stem) (void *)); 
     474    int     (*lang_stem) (void *); 
    474475    struct SN_env *snowball; 
    475476#else 
  • trunk/swish-e/src/swish2.c

    r1264 r1275  
    330330    /* set return value only if stem returns OK */ 
    331331#ifdef SNOWBALL 
    332     if ( sw->indexlist->header.fuzzy_data.fuzzy_routine(&sw->stemmed_word, &sw->stemmed_word_len,sw->indexlist->header.fuzzy_data.snowball) == STEM_OK ) 
     332    if ( sw->indexlist->header.fuzzy_data.fuzzy_routine(&sw->stemmed_word, &sw->stemmed_word_len,sw->indexlist->header.fuzzy_data.snowball,sw->indexlist->header.fuzzy_data.lang_stem) == STEM_OK ) 
    333333#else 
    334334    if ( sw->indexlist->header.fuzzy_data.fuzzy_routine(&sw->stemmed_word, &sw->stemmed_word_len) == STEM_OK ) 
  • trunk/swish-e/src/swish_words.c

    r1273 r1275  
    286286            case FUZZY_STEMMING_EN1: 
    287287            case FUZZY_STEMMING_EN2: 
    288                 header->fuzzy_data.fuzzy_routine(&self->word, &self->lenword,header->fuzzy_data.snowball); 
     288                header->fuzzy_data.fuzzy_routine(&self->word, &self->lenword,header->fuzzy_data.snowball,header->fuzzy_data.lang_stem); 
    289289#else 
    290290                header->fuzzy_data.fuzzy_routine(&self->word, &self->lenword);