/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

#include "udm_common.h"
#include "udm_env.h"
#include "udm_utils.h"
#include "udm_unicode.h"
#include "udm_unidata.h"
#include "udm_word.h"
#include "udm_synonym.h"
#include "udm_conf.h"


void UdmSynonymListInit(UDM_SYNONYMLIST * List){
     bzero((void*)List, sizeof(*List));
}

__C_LINK int __UDMCALL UdmSynonymListLoad(UDM_ENV * Env,const char * filename){
     FILE      *f;
     char      str[512];
     char      lang[64]="";
     UDM_CHARSET    *cs=NULL;
     UDM_CHARSET    *sys_int=UdmGetCharSet("sys-int");
     UDM_CONV  file_uni;
     UDM_WIDEWORD    *ww = NULL;
     
     if(!(f=fopen(filename,"r"))){
          udm_snprintf(Env->errstr,sizeof(Env->errstr)-1,"Can't open synonyms file '%s'",filename);
          return UDM_ERROR;
     }
     while(fgets(str,sizeof(str),f)){
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n')continue;
          
          if(!strncmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = udm_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=UdmGetCharSet(charset);
                    if(!cs){
                         udm_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         fclose(f);
                         return UDM_ERROR;
                    }
                    UdmConvInit(&file_uni,cs,sys_int,0);
               }
          }else
          if(!strncmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = udm_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    strncpy(lang,l,sizeof(lang)-1);
               }
          }else{
               char      *av[255];
               size_t         ac, i, j;

               if(!cs){
                    udm_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    fclose(f);
                    return UDM_ERROR;
               }
               if(!lang[0]){
                    udm_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    fclose(f);
                    return UDM_ERROR;
               }

               ac = UdmGetArgs(str, av, 255);
               if (ac < 2) continue;

               if ((ww = (UDM_WIDEWORD*)realloc(ww, ac * sizeof(UDM_WIDEWORD))) == NULL) return UDM_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = strlen(av[i]);
                 ww[i].uword = (int*)malloc((3 * ww[i].len + 1) * sizeof(int));
                 UdmConv(&file_uni, (char*)ww[i].uword, sizeof(int) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 UdmUniStrToLower(ww[i].uword);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (UDM_SYNONYM*)realloc(Env->Synonyms.Synonym, 
                                                   sizeof(UDM_SYNONYM)*Env->Synonyms.msynonyms);
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(UDM_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = UdmUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = UdmUniDup(ww[j].uword);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(UDM_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = UdmUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = UdmUniDup(ww[i].uword);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 UDM_FREE(ww[i].uword);
               }
               
          }
     }
     UDM_FREE(ww);
     fclose(f);
     return UDM_OK;
}

void UdmSynonymListFree(UDM_SYNONYMLIST * List){
     size_t i;
     
     for(i=0;i<List->nsynonyms;i++){
          UDM_FREE(List->Synonym[i].p.word);
          UDM_FREE(List->Synonym[i].p.uword);
          UDM_FREE(List->Synonym[i].s.word);
          UDM_FREE(List->Synonym[i].s.uword);
     }
     UDM_FREE(List->Synonym);
}

static int cmpsyn(const void * v1,const void * v2){
     const UDM_SYNONYM * s1=(const UDM_SYNONYM*)v1;
     const UDM_SYNONYM * s2=(const UDM_SYNONYM*)v2;
     return(UdmUniStrCmp(s1->p.uword,s2->p.uword));
}

__C_LINK void __UDMCALL UdmSynonymListSort(UDM_SYNONYMLIST * List){
     if(List->nsynonyms)
          UdmSort(List->Synonym,List->nsynonyms,sizeof(UDM_SYNONYM),&cmpsyn);
}


UDM_WIDEWORDLIST * UdmSynonymListFind(const UDM_SYNONYMLIST * List,UDM_WIDEWORD * wword){
     UDM_SYNONYM syn,*res,*first,*last;
     UDM_WIDEWORDLIST * Res=NULL;

     if(!List->nsynonyms)return NULL;

     syn.p.uword=wword->uword;

     res=bsearch(&syn,List->Synonym,List->nsynonyms,sizeof(UDM_SYNONYM),&cmpsyn);

     if(res){
          size_t nnorm,i;

          Res=(UDM_WIDEWORDLIST *)malloc(sizeof(*Res));
          UdmWideWordListInit(Res);

          /* Find first and last synonym */
          for(first=res;first>List->Synonym;first--){
               if(UdmUniStrCmp(wword->uword,first->p.uword)){
                    break;
               }else{
                    first->s.order=wword->order;
                    first->s.origin = UDM_WORD_ORIGIN_SYNONYM;
                    UdmWideWordListAdd(Res,&first->s);
               }
          }
          for(last=res+1;last<List->Synonym+List->nsynonyms;last++){
               if(UdmUniStrCmp(wword->uword,last->p.uword)){
                    break;
               }else{
                    last->s.order=wword->order;
                    last->s.origin = UDM_WORD_ORIGIN_SYNONYM;
                    UdmWideWordListAdd(Res,&last->s);
               }
          }

          /* Now find each of them in reverse order */
          nnorm=Res->nwords;
          for(i=0;i<nnorm;i++){

               syn.p.uword=Res->Word[i].uword;
               res=bsearch(&syn,List->Synonym,List->nsynonyms,sizeof(UDM_SYNONYM),&cmpsyn);
               
               if(res){
                    /* Find first and last synonym */
                    for(first=res;first>List->Synonym;first--){
                         if(UdmUniStrCmp(syn.p.uword,first->p.uword)){
                              break;
                         }else{
                              first->s.order=wword->order;
                              first->s.origin = UDM_WORD_ORIGIN_SYNONYM;
                              UdmWideWordListAdd(Res,&first->s);
                         }
                    }
                    for(last=res+1;last<List->Synonym+List->nsynonyms;last++){
                         if(UdmUniStrCmp(syn.p.uword,last->p.uword)){
                              break;
                         }else{
                              last->s.order=wword->order;
                              last->s.origin = UDM_WORD_ORIGIN_SYNONYM;
                              UdmWideWordListAdd(Res,&last->s);
                         }
                    }
               }
          }
     }
     return(Res);
}
