/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_IO_H
#include <io.h>
#endif
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <math.h>

#include "udm_common.h"
#include "udm_utils.h"
#include "udm_unicode.h"
#include "udm_unidata.h"
#include "udm_uniconv.h"
#include "udm_searchtool.h"
#include "udm_boolean.h"
#include "udm_xmalloc.h"
#include "udm_spell.h"
#include "udm_stopwords.h"
#include "udm_word.h"
#include "udm_vars.h"
#include "udm_db.h"
#include "udm_db_int.h"
#include "udm_url.h"
#include "udm_hash.h"
#include "udm_parsehtml.h"
#include "udm_store.h"
#include "udm_doc.h"
#include "udm_conf.h"
#include "udm_result.h"
#include "udm_log.h"
#include "udm_sgml.h"
#include "udm_mutex.h"
#include "udm_chinese.h"

#ifdef CHASEN
#include <chasen.h>
#endif

#ifdef MECAB
#include <mecab.h>
#endif

/*
#define DEBUG_CACHE
*/

typedef struct {
  size_t position;
  size_t order;
} UDM_PHR_DAT;


/********** QSORT functions *******************************/

static int cmpword(UDM_URL_CRD *s1,UDM_URL_CRD *s2){
        if (s1->coord > s2->coord) return -1;
	if (s1->coord < s2->coord) return 1;
	if (s1->url_id > s2->url_id) return 1;
	if (s1->url_id < s2->url_id) return -1;
	return 0;
}

static int cmpurlid (UDM_URL_CRD *s1, UDM_URL_CRD *s2) {
	if (s1->url_id > s2->url_id) return(1);
	if (s1->url_id < s2->url_id) return(-1);
	return(UDM_WRDPOS(s1->coord) - UDM_WRDPOS(s2->coord));
}

static int cmpsiteid(UDM_URLCRDLIST *L, UDM_URL_CRD *C, UDM_URLDATA *D, long j){
        if (D->site_id > L->Data[j].site_id) return 1;
	if (D->site_id < L->Data[j].site_id) return -1;
	if (C->coord > L->Coords[j].coord) return 1;
	if (C->coord < L->Coords[j].coord) return -1;
	if (D->pop_rank > L->Data[j].pop_rank) return 1;
	if (D->pop_rank < L->Data[j].pop_rank) return -1;
	return 0;
}

static int cmppattern(UDM_URLCRDLIST *L, UDM_URL_CRD *C, UDM_URLDATA *D, long j, const char *pattern) {
  for(; *pattern != '\0'; pattern++) {
    switch(*pattern) {
    case 'R':
    case 'r':
      if (C->coord > L->Coords[j].coord) return (*pattern == 'R') ? 1 : -1;
      if (C->coord < L->Coords[j].coord) return (*pattern == 'R') ? -1 : 1;
      break;
    case 'P':
    case 'p':
      if (D->pop_rank > L->Data[j].pop_rank) return (*pattern == 'P') ? 1 : -1;
      if (D->pop_rank < L->Data[j].pop_rank) return (*pattern == 'P') ? -1 : 1;
      break;
    case 'D':
    case 'd':
      if (D->last_mod_time > L->Data[j].last_mod_time) return (*pattern == 'D') ? 1 : -1;
      if (D->last_mod_time < L->Data[j].last_mod_time) return (*pattern == 'D') ? -1 : 1;
      break;
    }
  }
  return 0;
}

static int cmpphr(UDM_PHR_DAT *p1, UDM_PHR_DAT *p2) {
  if (p1->position < p2->position) return -1;
  if (p1->position > p2->position) return 1;
  return 0;
}

/****************************************************/

void UdmSortSearchWordsByWeight(UDM_URL_CRD *wrd,size_t num){
	if (wrd)UdmSort((void*)wrd,num,sizeof(*wrd),(qsort_cmp)cmpword);
	return;
}

void UdmSortSearchWordsByURL(UDM_URL_CRD *wrd,size_t num){
	if(wrd)UdmSort((void*)wrd,num,sizeof(*wrd),(qsort_cmp)cmpurlid);
	return;
}

static size_t UdmH[] = {1, 5, 19, 41, 109, 209, 505, 929, 2161, 3905, 8929, 16001, 36289, 64769};

void UdmSortSearchWordsBySite(UDM_URLCRDLIST *L, size_t num){
        register ssize_t h, i, j;
	int s = 13;
	UDM_URL_CRD Crd;
	UDM_URLDATA Dat;

	while((s > 0) && ((num / 3) < UdmH[s])) s--;
	while(s >= 0) {
	  h = UdmH[s];
	  for (j = h; j < (ssize_t)num; j++) {
	    Crd = L->Coords[j];
	    Dat = L->Data[j];

	    i = j - h;
DD4:
	    if (cmpsiteid(L, &Crd, &Dat, i) <= 0) goto DD6;
	    L->Coords[i + h] = L->Coords[i];
	    L->Data[i + h] = L->Data[i];
	    i -= h;
	    if (i >= 0) goto DD4;

DD6:
	    L->Coords[i + h] = Crd;
	    L->Data[i + h] = Dat;
	  }
	  s--;
	}

	return;
}


void UdmSortSearchWordsByPattern(UDM_RESULT *Res, UDM_URLCRDLIST *L, size_t num, const char *pattern) {
        register ssize_t h, i, j;
	int s = 13;
	UDM_URL_CRD Crd;
	UDM_URLDATA Dat;
	size_t Cnt = 1;

	while( (s > 0) && ((num / 3) < UdmH[s])) s--;
	while(s >= 0) {
	  h = UdmH[s];
	  for (j = h; j < (ssize_t)num; j++) {
	    Crd = L->Coords[j];
	    Dat = L->Data[j];
	    if (Res->PerSite) Cnt = Res->PerSite[j];

	    i = j - h;
D4:
	    if (cmppattern(L, &Crd, &Dat, i, pattern) <= 0) goto D6;
	    L->Coords[i + h] = L->Coords[i];
	    L->Data[i + h] = L->Data[i];
	    if (Res->PerSite) Res->PerSite[i + h] = Res->PerSite[i];
	    i -= h;
	    if (i >= 0) goto D4;

D6:
	    L->Coords[i + h] = Crd;
	    L->Data[i + h] = Dat;
	    if (Res->PerSite) Res->PerSite[i + h] = Cnt;
	  }
	  s--;
	}

	return;
}


/*#define DEBUG_TOP_SORT*/

/* Find topcount best results */
void UdmWrdTopSort(UDM_URL_CRD *wrd, size_t nwrd,size_t topcount){
	size_t j;
	UDM_URL_CRD w;
	
#ifdef DEBUG_TOP_SORT
	fprintf(stderr,"top+1=%d nwrd=%d\n",topcount+1,nwrd);
#endif
	
	UdmSortSearchWordsByWeight(wrd,topcount+1);
		for(j=topcount;j<nwrd;j++){
		register int res;

		if (wrd[j].coord > wrd[topcount].coord) res = 1;
		else if (wrd[j].coord < wrd[topcount].coord) res = -1;
		else
/*		if(!(res=(wrd[j].coord-wrd[topcount].coord)))*/
		  res = (wrd[topcount].url_id - wrd[j].url_id);
#ifdef DEBUG_TOP_SORT
fprintf(stderr,"(%d,%d) %d (%d,%d) %d\n",
	wrd[topcount].coord,wrd[topcount].url_id,topcount,
	wrd[j].coord,wrd[j].url_id,j);
#endif
		if(res>0){
			size_t l,c,r;
			
			l=0;r=topcount;
			while(l<r){
				c=(l+r)/2;
				if (wrd[c].coord > wrd[j].coord) res = 1;
				else if (wrd[c].coord < wrd[j].coord) res = -1;
				else
/*				if(!(res=(wrd[c].coord-wrd[j].coord)))*/
				  res = (wrd[j].url_id - wrd[c].url_id);
				if(res>0){
					l=c+1;
				}else{
					r=c;
				}
			}
			w=wrd[topcount];
			memmove(&wrd[r+1],&wrd[r],(topcount-r)*sizeof(*wrd));
			wrd[r]=wrd[j];
			wrd[j]=w;
		}
	}
}

int UdmPrepare(UDM_AGENT * query,UDM_RESULT *Res){
	UDM_CHARSET * browser_cs, * local_cs, *sys_int;
	int  ctype;
	int * ustr, * lt, * lex;
	size_t ulen;
	int word_match   = UdmVarListFindInt(&query->Conf->Vars,"wm",UDM_MATCH_FULL);
/*	int search_mode = UdmSearchMode(UdmVarListFindStr(&query->Conf->Vars, "m", "all")); */
	const char * txt = UdmVarListFindStr(&query->Conf->Vars,"q","");
	const char * qlang = UdmVarListFindStr(&query->Conf->Vars, "g", NULL);
	char *ltxt;
	size_t i, wlen, llen, nphrasecmd = 0;
	char *wrd, *clex;
	int *uwrd;
	UDM_CONV uni_lc, bc_uni;
	const char *lang;
	
	if ((wrd = (char*)UdmMalloc(query->Conf->WordParam.max_word_len * 12 + 1)) == NULL) return 0;
	if ((uwrd = (int*)UdmMalloc(sizeof(int) * (query->Conf->WordParam.max_word_len + 1))) == NULL) { UDM_FREE(wrd); return 0; }


	if (!(browser_cs = query->Conf->bcs)) {
		browser_cs=UdmGetCharSet("iso-8859-1");
	}
	if(!(local_cs = query->Conf->lcs)) {
		local_cs=UdmGetCharSet("iso-8859-1");
	}
	
	if (!(sys_int=UdmGetCharSet("sys-int"))) {
	        UDM_FREE(uwrd); UDM_FREE(wrd);
		return 0;
	}
	
	UdmConvInit(&bc_uni,browser_cs,sys_int,UDM_RECODE_HTML);
	UdmConvInit(&uni_lc,sys_int,local_cs,UDM_RECODE_HTML);
	
	ulen=strlen(txt);
	ustr=(int*)(UdmMalloc((sizeof(int))*(ulen+1)));
	UdmConv(&bc_uni,(char*)ustr,sizeof(ustr[0])*(ulen+1),txt,ulen+1);
	for(i = 0; ustr[i] && (i < ulen); i++) if (ustr[i] == '"') ustr[i] = '\'';
	
	/* Create copy of query, converted into LocalCharset (for UdmTrack) */
	llen = ulen * 14 + 1;
	ltxt=(char*)UdmMalloc(llen);
	UdmConv(&uni_lc,ltxt,llen,(char*)ustr,bc_uni.obytes);
	ltxt[uni_lc.obytes]='\0';
	UdmVarListReplaceStr(&query->Conf->Vars,"q",ltxt);  /* "q-lc" was here */
	UDM_FREE(ltxt);
	
	/* Parse query and build boolean search stack*/
	UdmUniStrToLower(ustr);
	switch(browser_cs->family) {
	case UDM_CHARSET_CHINESE_SIMPLIFIED:
	case UDM_CHARSET_CHINESE_TRADITIONAL: lang = "zh"; break;
	case UDM_CHARSET_JAPANESE: lang = "ja"; break;
	case UDM_CHARSET_THAI: lang = "th"; break;
	default: lang = "";
	}
	ustr = UdmUniSegment(query, ustr, lang);

	lex = UdmUniGetSepToken(ustr, &lt , &ctype);
	while(lex){
		wlen=lt-lex;
		memcpy(uwrd, lex, (udm_min(wlen, query->Conf->WordParam.max_word_len)) * sizeof(int));
		uwrd[udm_min(wlen, query->Conf->WordParam.max_word_len)] = 0;
		UdmConv(&uni_lc, wrd, query->Conf->WordParam.max_word_len * 12,(char*)uwrd, sizeof(uwrd[0])*(wlen+1));
		clex = UdmTrim(wrd, " \t\r\n");
			
	  if ((ctype != UDM_UNI_LETTER) && (ctype != UDM_UNI_CJK)) {
		  for (i = 0; i < strlen(clex); i++) {
		        switch(clex[i]) {
			case '&':
			case '+':
			  Res->items[Res->nitems].cmd = UDM_STACK_AND;
			  break;
			case '|':
			  Res->items[Res->nitems].cmd = UDM_STACK_OR;
			  break;
			case '~':
			  Res->items[Res->nitems].cmd = UDM_STACK_NOT;
			  break;
			case '(':
			  Res->items[Res->nitems].cmd = UDM_STACK_LEFT;
			  break;
			case ')':
			  Res->items[Res->nitems].cmd = UDM_STACK_RIGHT;
			  break;
			case '"':
			case '\'':
			  Res->items[Res->nitems].cmd = UDM_STACK_PHRASE;
			  nphrasecmd++;
			  break;
			default: continue;
			}
			Res->items[Res->nitems].arg=0;
			Res->nitems++;
			Res->ncmds++;
			if (Res->nitems >= Res->mitems) {
			  Res->mitems += UDM_MAXSTACK;
			  Res->items = (UDM_STACK_ITEM*)UdmRealloc(Res->items, Res->mitems * sizeof(UDM_STACK_ITEM));
			}
		}
	  } else {
			int addwrd=1;

			Res->items[Res->nitems].cmd=UDM_STACK_WORD;
			Res->items[Res->nitems].arg = Res->WWList.nuniq;  /* 1L << (Res->WWList.nuniq); */
			Res->nitems++;
			if (Res->nitems >= Res->mitems) {
			  Res->mitems += UDM_MAXSTACK;
			  Res->items = (UDM_STACK_ITEM*)UdmRealloc(Res->items, Res->mitems * sizeof(UDM_STACK_ITEM));
			}

			if(word_match==UDM_MATCH_FULL){
				/* Check stopword only when full word         */
				/* Substring searches should not exclude them */
				if(UdmStopListFind(&query->Conf->StopWords, wrd, qlang) ||
						(query->Conf->WordParam.min_word_len>wlen)||
						(query->Conf->WordParam.max_word_len<wlen)){
				  UDM_WIDEWORD OWord;
				
				  OWord.len=strlen(wrd);
				  OWord.order=Res->WWList.nuniq;
				  OWord.count=0;
				  OWord.crcword=UdmStrHash32(wrd);
				  OWord.word=wrd;
				  OWord.uword=uwrd;
				  OWord.origin = UDM_WORD_ORIGIN_STOP;
				  UdmWideWordListAdd(&Res->WWList, &OWord);
				  addwrd=0;
				}
			}
			if(Res->WWList.nuniq >= UDM_MAXWORDPERQUERY-1){
				addwrd=0;
			}

			if (addwrd) {
				size_t nw;
				for (nw = 0; nw < Res->WWList.nwords; nw++) {
					if (UdmUniStrCmp(Res->WWList.Word[nw].uword, uwrd)) continue;
					addwrd = 0;
					break;
				}
			}

			if(addwrd){
				UDM_WIDEWORD OWord;
				UDM_WIDEWORDLIST * forms;
				
				OWord.len=strlen(wrd);
				OWord.order=Res->WWList.nuniq;
				OWord.count=0;
				OWord.crcword=UdmStrHash32(wrd);
				OWord.word=wrd;
				OWord.uword=uwrd;
				OWord.origin = UDM_WORD_ORIGIN_QUERY;

				UdmWideWordListAdd(&Res->WWList, &OWord);
				if((forms=UdmAllForms(query,&OWord))){
					UDM_WIDEWORD FWord;
					size_t frm;
					for(frm=0;frm<forms->nwords;frm++){
						UdmConv(&uni_lc,wrd,12*query->Conf->WordParam.max_word_len,
							(char*)(forms->Word[frm].uword),
							sizeof(forms->Word[frm].uword[0])*(UdmUniLen(forms->Word[frm].uword)+1));
						FWord.len=strlen(wrd);
						FWord.order=Res->WWList.nuniq;
						FWord.count=0;
						FWord.crcword=UdmStrHash32(wrd);
						FWord.word=wrd;
						FWord.uword=forms->Word[frm].uword;
						FWord.origin = forms->Word[frm].origin;

						UdmWideWordListAdd(&Res->WWList,&FWord);
/*						UdmLog(query, UDM_LOG_DEBUG, "Word form: [%d] %s", FWord.origin, wrd);*/
					}
					UdmWideWordListFree(forms);
	    				UDM_FREE(forms);
				}
				Res->WWList.nuniq++;
			}
		}
		lex = UdmUniGetSepToken(NULL, &lt, &ctype);
	}
	if (nphrasecmd & 1) {
	  Res->items[Res->nitems].cmd = UDM_STACK_PHRASE;
	  Res->items[Res->nitems].arg=0;
	  Res->nitems++;
	  Res->ncmds++;
	  if (Res->nitems >= Res->mitems) {
	    Res->mitems += UDM_MAXSTACK;
	    Res->items = (UDM_STACK_ITEM*)UdmRealloc(Res->items, Res->mitems * sizeof(UDM_STACK_ITEM));
	  }
	}
	UDM_FREE(ustr); UDM_FREE(uwrd); UDM_FREE(wrd);
		
	return(0);
}

static uint4 UdmCalcCosineWeight(unsigned int *R, unsigned int *D, size_t nw, size_t ns, size_t nadd) {
  double x = 0.0, y = 0.0, xy = 0.0;
  size_t i, j, c;
  double TR, TD;

  for (i = 0; i < nadd; i++) {
    TR = (double)R[i]; TD = (double)D[i];
    x += TR * TR;
    y += TD * TD;
    xy += TR * TD;
  }

  for (j = 0; j < ns; j++) {
    c = nadd + j;
    for (i = 0; i < nw; i++) {
      TR = (double)R[c]; TD = (double)D[c];
      x += TR * TR;
      y += TD * TD;
      xy += TR * TD;
      c += 256;
    }
  }

/*  fprintf(stderr, "R: %.4f  D: %.4f  RD: %.4f   cos: %.8f [%d]\n", x, y, xy,
	   xy / sqrt(x) / sqrt(y),
	  (uint4)(100000.0 * xy / sqrt(x) / sqrt(y) + 0.5));*/
  return (uint4)(100000.0 * xy / sqrt(x) / sqrt(y) + 0.5);
}

static int UdmOriginWeight(int origin) {
  switch(origin) {
  case UDM_WORD_ORIGIN_QUERY: return 3;
  case UDM_WORD_ORIGIN_SPELL: return 1;
  }
  return 0;
}

static unsigned int CalcAverageWordDistance(UDM_PHR_DAT *phr, size_t num) {
  size_t i, sum = 0;
  size_t np = 0, cur_order;

  UdmSort((void*)phr, num, sizeof(UDM_PHR_DAT), (qsort_cmp)cmpphr);
  cur_order = phr[0].order;
  for (i = 1; i < num; i++) {
    if (phr[i].order != cur_order) {
      sum += phr[i].position - phr[i-1].position;
      cur_order = phr[i].order;
      np++;
    }
  }
  return (np) ? (sum / np) : 0;
}


/*
#define DEBUG_REL 1
*/

void UdmGroupByURL(UDM_AGENT *query,UDM_RESULT *Res){	
	UDM_STACK_ITEM *temp_items;
	size_t	i, j = 0, D_size, nitems = Res->nitems;
	size_t  *count, count_size = Res->WWList.nuniq * sizeof(size_t);
	int search_mode = UdmSearchMode(UdmVarListFindStr(&query->Conf->Vars, "m", "all"));
	size_t nsections = UdmVarListFindInt(&query->Conf->Vars, "NumSections", 256);
	UDM_URL_CRD *Crd = Res->CoordList.Coords;
	int wf[256], add_cmd = UDM_STACK_AND, inphrase;
	unsigned int *R, *D;
	UDM_PHR_DAT *phr;
	size_t phr_n = 0, phr_m = 256;
	size_t pharg, x, y, z, ph_start, ph_end;

	
	if(!Res->CoordList.ncoords) return;

#ifdef NEW_REL
  {
    urlid_t prev_id;
    size_t prev_num= 0;
    size_t N= 0;
    size_t M= Res->WWList.nuniq;
    size_t ndoc= 0; 
    
    float  TFi[UDM_MAXWORDPERQUERY+1];
    float   Ci[UDM_MAXWORDPERQUERY+1];
    float   Vi[UDM_MAXWORDPERQUERY+1];    
    float   Wi[UDM_MAXWORDPERQUERY+1];
    float   Di[UDM_MAXWORDPERQUERY+1];
    
    bzero(TFi, sizeof(TFi));
    bzero(Ci, sizeof(Ci));
    bzero(Vi, sizeof(Vi));
    bzero(Di, sizeof(Di));
    
    
    /* N - number of documents */
    for (prev_id= Crd[0].url_id, i=0; i<= Res->CoordList.ncoords; i++)
    {
      if ( (i==Res->CoordList.ncoords) || (prev_id != Crd[i].url_id))
      {
        N++;
        if (i < Res->CoordList.ncoords)
        {
          prev_id= Crd[i].url_id;
          prev_num= i;
        }
      }

#ifdef DEBUG_REL
      if (i < Res->CoordList.ncoords)
      {
        fprintf(stderr,"[%d]doc=%d sec=%d wrd=%d pos=%d\n",
          i,Crd[i].url_id,
          UDM_WRDSEC(Crd[i].coord),
          UDM_WRDNUM(Crd[i].coord),
          UDM_WRDPOS(Crd[i].coord));
      }
#endif
    }
    
    
    /* TFi */
    for (i=0; i < Res->CoordList.ncoords; i++)
    {
      TFi[UDM_WRDNUM(Crd[i].coord)]++;
    }
    
    /* Add fictious word */
    TFi[M]= ((float) Res->CoordList.ncoords) / M;
    M++;
    
    
    
    /* Ci */
    for (i=0; i < M; i++)
    {
       Ci[i]= ((float) TFi[i]) / N;
#ifdef DEBUG_REL
       fprintf(stderr,"[%d] TDi=%d Ci=%f\n",i,TFi[i], Ci[i]);
#endif
    }
    
    
    /* Vi */
    for (prev_id= Crd[0].url_id, i=0; i<= Res->CoordList.ncoords; i++)
    {
      size_t k;
      
      if ( (i==Res->CoordList.ncoords) || (prev_id != Crd[i].url_id))
      {
        Di[M-1]= TFi[M-1]/N; /* fictious word */
        for (k=0; k < M; k++)
        {
          Vi[k]+= (Di[k] - Ci[k]) * (Di[k] - Ci[k]);
        }
        bzero(Di, sizeof(Di));
        
        if (i < Res->CoordList.ncoords)
        {
          prev_id= Crd[i].url_id;
          prev_num= i;
        }
      }
      if (i < Res->CoordList.ncoords)
        Di[UDM_WRDNUM(Crd[i].coord)]++;
    }
    
    
    for (i=0; i < M; i++)
    {
      Vi[i]/= (N-1);
      Wi[i]= Ci[i] ? TFi[i]*Vi[i]/Ci[i]/Ci[i] : 0;
#ifdef DEBUG_REL
      fprintf(stderr,"[%d] Vi=%f Wi=%f\n", i, Vi[i],Wi[i]);
#endif
    }
    
    
    bzero(Di,sizeof(Di));
    for (prev_id= Crd[0].url_id, i=0; i<= Res->CoordList.ncoords; i++)
    {
      size_t k;
      
      if ( (i==Res->CoordList.ncoords) || (prev_id != Crd[i].url_id))
      {
        float sumWiDi= 0;
        float sumDi2= 0;
        float sumWi2= 0;
        float res;
        size_t n= 1;
        
        Di[M-1]= TFi[M-1]/N;  /* fictious word */
        
        for (k=0; k < M; k++)
        {
          sumWiDi+= Wi[k] * Di[k] / (float)M;
          sumDi2+=  Di[k] * Di[k] / (float)M;
          sumWi2+=  Wi[k] * Wi[k];
          if (k + 1 < M)
            n*= Di[k];
        }
        
        res= sumWiDi / sqrt(sumDi2 * sumWi2);
        Crd[ndoc].url_id= prev_id;
        Crd[ndoc].coord= res * 100000;
        
#ifdef DEBUG_REL
        fprintf(stderr,"RES[%d]=%f %f %f\n",
          prev_id, res, sumWiDi, sqrt(sumDi2 * sumWi2));
#endif 
        
        if (n)
          ndoc++;
        
        bzero(Di, sizeof(Di));
        if (i < Res->CoordList.ncoords)
        {
          prev_id= Crd[i].url_id;
          prev_num= i;
        }
      }
      
      if (i <= Res->CoordList.ncoords)
        Di[UDM_WRDNUM(Crd[i].coord)]++;
    }
    
    //fprintf(stderr,"  : %d\n", M);
    //fprintf(stderr,"  : %d\n", N);
    
    Res->CoordList.ncoords= ndoc;
    return;
    
  }
#endif

	UdmWeightFactorsInit(UdmVarListFindStr(&query->Conf->Vars, "wf", ""), wf);
	phr = (UDM_PHR_DAT*)UdmMalloc(phr_m * sizeof(UDM_PHR_DAT));
	count = (size_t*)UdmMalloc(count_size);
	D_size = (Res->WWList.nwords * 256 + 1) * sizeof(unsigned int) + 1;
	if ((R = (unsigned int*)UdmMalloc(D_size)) == NULL) return;
	if ((D = (unsigned int*)UdmMalloc(D_size)) == NULL) {
	  UDM_FREE(R);
	  return;
	}
	if ((temp_items = (UDM_STACK_ITEM*)UdmMalloc((Res->nitems + 1) * sizeof(UDM_STACK_ITEM))) == NULL) {
	  UDM_FREE(R); UDM_FREE(D);
	  return;
	}

	bzero((void*)D, D_size);
	bzero((void*)count, count_size);
	for(i = 0; i < Res->WWList.nwords * 256; i++) R[1 + i] = ((wf[i & 0xFF]) << 2) + UdmOriginWeight(Res->WWList.Word[i >> 8].origin);
	R[0] = 0;

	D[1 + 256 * UDM_WRDNUM(Crd[0].coord) + UDM_WRDSEC(Crd[0].coord)] = wf[UDM_WRDSEC(Crd[0].coord)];

	if (Res->ncmds > 0 || search_mode == UDM_MODE_BOOL) {
	  switch(search_mode) {
	  case UDM_MODE_ANY: add_cmd = UDM_STACK_OR; break;
	  case UDM_MODE_BOOL:
	  case UDM_MODE_ALL: add_cmd = UDM_STACK_AND; break;
	  }
	  j = 1;
	  temp_items[0] = Res->items[0];
	  if (Res->items[0].cmd == UDM_STACK_PHRASE) inphrase = 0; else inphrase = 1;

	  for (i = 1; i < Res->nitems; i++) {
	    if (Res->items[i].cmd == UDM_STACK_PHRASE) inphrase = (inphrase + 1) & 1;
	    if (inphrase && Res->items[i].cmd == UDM_STACK_WORD && temp_items[j].cmd == UDM_STACK_WORD) {
	      temp_items[j].cmd = add_cmd;
	      temp_items[j++].arg = 0;
	    }
	    temp_items[j++] = Res->items[i];
	  }
	  search_mode = UDM_MODE_BOOL;
	  nitems = j;
	}


	phr[phr_n].position = UDM_WRDPOS(Crd[0].coord);
	phr[phr_n].order = Res->WWList.Word[UDM_WRDNUM(Crd[0].coord)].order;
	count[phr[phr_n].order]++;
	phr_n++;

	j = 0;
	for(i = 1; i < Res->CoordList.ncoords; i++){
		/* Group by url_id */
		if(Crd[j].url_id==Crd[i].url_id){
		        /* Same document */

		        phr[phr_n].position = UDM_WRDPOS(Crd[i].coord);
			phr[phr_n].order = Res->WWList.Word[UDM_WRDNUM(Crd[i].coord)].order;
			count[phr[phr_n].order]++;
			if (++phr_n >= phr_m) {
			  phr_m += 256;
			  phr = (UDM_PHR_DAT*)UdmRealloc(phr, phr_m * sizeof(UDM_PHR_DAT));
			}

			D[1 + 256 * UDM_WRDNUM(Crd[i].coord) + UDM_WRDSEC(Crd[i].coord)] = 
			  (wf[UDM_WRDSEC(Crd[i].coord)] << 2) + UdmOriginWeight(Res->WWList.Word[UDM_WRDNUM(Crd[i].coord)].origin);
		}else{
			/* Next document */

		        switch(search_mode) {
			case UDM_MODE_BOOL:
/* [[[[[[[[[[[[[[ */
			        for(z = 0; z < nitems - 1; z++) {
				  if (temp_items[z].cmd == UDM_STACK_PHRASE) { /* we assume correct stack at this moment,
										  i.e. no empty phrases nor illegal operations order */
				    pharg = 0;
				    ph_start = z + 1;
				    for (ph_end = ph_start; ph_end < nitems && temp_items[ph_end].cmd == UDM_STACK_WORD; ph_end++);
				    if (phr_n >= (ph_end - ph_start))
				    for (y = 0; y <= phr_n - (ph_end - ph_start); y++) {
				      if (temp_items[ph_start].arg == phr[y].order) {
					pharg = 1;
					for (x = ph_start + 1; x < ph_end; x++) {
					  if (temp_items[x].arg != phr[y + x - ph_start].order 
					      || phr[y + x - ph_start - 1].position !=  (phr[y + x - ph_start ].position - 1)  ) {
					    pharg = 0;
					    break;
					  }
					}
					if (pharg) break;
				      }
				    }
				    for (y = ph_start; y < ph_end; y++) count[temp_items[y].arg] = pharg;
				    for (z++; z < nitems && (temp_items[z].cmd != UDM_STACK_PHRASE); z++);
				  }
				}
/* ]]]]]]]]]]]]]] */
				if(UdmCalcBoolItems(temp_items, nitems, count)){
				        D[0] = CalcAverageWordDistance(phr, phr_n);
				        Crd[j].coord = UdmCalcCosineWeight(R, D, Res->WWList.nwords, nsections, 1);
					j++;
				}
				break;
			case UDM_MODE_ALL:
			        for (z = 0; z < Res->WWList.nuniq; z++) if (count[z] == 0) break;
				if (z < Res->WWList.nuniq && count[z] == 0) break;
			default:
				        D[0] = CalcAverageWordDistance(phr, phr_n);
				        Crd[j].coord = UdmCalcCosineWeight(R, D, Res->WWList.nwords, nsections, 1);
					j++;
			}


			Crd[j]=Crd[i];

			bzero((void*)D, D_size);
			bzero((void*)count, count_size);
			phr_n = 0;
			phr[phr_n].position = UDM_WRDPOS(Crd[i].coord);
			phr[phr_n].order = Res->WWList.Word[UDM_WRDNUM(Crd[i].coord)].order;
			count[phr[phr_n].order]++;
			phr_n++;

			D[1 + 256 * UDM_WRDNUM(Crd[i].coord) + UDM_WRDSEC(Crd[i].coord)] = 
			  (wf[UDM_WRDSEC(Crd[i].coord)] << 2) + UdmOriginWeight(Res->WWList.Word[UDM_WRDNUM(Crd[i].coord)].origin);
		}
	}

	/* Check last word */

	D[0] = CalcAverageWordDistance(phr, phr_n);
	Crd[j].coord = UdmCalcCosineWeight(R, D, Res->WWList.nwords, nsections, 1);

	switch(search_mode) {
	case UDM_MODE_BOOL:
/* [[[[[[[[[[[[[[ */
			        for(z = 0; z < nitems - 1; z++) {
				  if (temp_items[z].cmd == UDM_STACK_PHRASE) { /* we assume correct stack at this moment,
										  i.e. no empty phrases nor illegal operations order */
				    pharg = 0;
				    ph_start = z + 1;
				    for (ph_end = ph_start; ph_end < nitems && temp_items[ph_end].cmd == UDM_STACK_WORD; ph_end++);
				    if (phr_n >= (ph_end - ph_start))
				    for (y = 0; y <= phr_n - (ph_end - ph_start); y++) {
				      if (temp_items[ph_start].arg == phr[y].order) {
					pharg = 1;
					for (x = ph_start + 1; x < ph_end; x++) {
					  if (temp_items[x].arg != phr[y + x - ph_start].order 
					      || phr[y + x - ph_start - 1].position !=  (phr[y + x - ph_start ].position - 1)  ) {
					    pharg = 0;
					    break;
					  }
					}
					if (pharg) break;
				      }
				    }
				    for (y = ph_start; y < ph_end; y++) count[temp_items[y].arg] = pharg;
				    for (z++; z < nitems && (temp_items[z].cmd != UDM_STACK_PHRASE); z++);
				  }
				}
/* ]]]]]]]]]]]]]] */
	  Res->CoordList.ncoords = (UdmCalcBoolItems(temp_items, nitems, count)) ? j + 1 : j;
	  break;
	case UDM_MODE_ALL:
	  for (z = 0; z < Res->WWList.nuniq; z++) if (count[z] == 0) break;
	  Res->CoordList.ncoords = (z < Res->WWList.nuniq && count[z] == 0) ? j : j + 1;
	  break;
	default:
	  Res->CoordList.ncoords = ( Crd[j].coord ) ? j + 1 : j;
	}

	UDM_FREE(temp_items);
	UDM_FREE(D);
	UDM_FREE(R);
	UDM_FREE(count);
	UDM_FREE(phr);
	return;
}


void UdmGroupBySite(UDM_AGENT *query, UDM_RESULT *Res){	
	size_t	i, j = 0, cnt = 1; 
	urlid_t Doc_site;
	UDM_URL_CRD *Crd = Res->CoordList.Coords;
	UDM_URLDATA *Dat = Res->CoordList.Data;
	
	if(!Res->CoordList.ncoords) return;
	if ((Res->PerSite = (size_t*)UdmMalloc(sizeof(*Res->PerSite) * Res->CoordList.ncoords)) == NULL) return;

	Doc_site = Dat[0].site_id;

	for(i = 1; i < Res->CoordList.ncoords; i++){
		/* Group by site_id */
		if(Doc_site == Dat[i].site_id){
		/* Same site */
		  /* adjust document rating rating according nuber of documents from site, if we need this */
		  cnt++;
		}else{
			/* Next site */
			Res->PerSite[j] = cnt;
			cnt = 1;
		        j++;
			Doc_site = Dat[i].site_id;
			Crd[j] = Crd[i];
			Dat[j] = Dat[i];
		}
	}
	Res->PerSite[j] = cnt;
	Res->CoordList.ncoords = j + 1;

	return;
}

typedef struct{
	/*char	phr_len;*/
	uint4	count;
	uint4	weight;
} UDM_PHR_PAR;

/******** Convert category string into 32 bit number *************/
void UdmDecodeHex8Str(const char *hex_str, uint4 *hi, uint4 *lo, uint4 *fhi, uint4 *flo){
  char str[33],str_hi[17],str_lo[17], *s = str;

        strncpy(str, hex_str, 13);
	str[12] = '\0';
	strcat(str,"000000000000");
	while(*s == '0') *s++ = ' ';
	strncpy(str_hi,&str[0],6); str_hi[6]=0;
	strncpy(str_lo,&str[6],6); str_lo[6]=0;
	
	*hi = (uint4)strtoul(str_hi, (char **)NULL, 36);
	*lo = (uint4)strtoul(str_lo, (char **)NULL, 36);

	if ((fhi != NULL) && (flo != NULL)) {
	  strncpy(str, hex_str, 13);
	  str[12] = '\0';
	  strcat(str,"ZZZZZZZZZZZZ");
	  strncpy(str_hi, &str[0], 6); str_hi[6] = 0;
	  strncpy(str_lo, &str[6], 6); str_lo[6] = 0;
	
	  *fhi = strtoul(str_hi, (char **)NULL, 36);
	  *flo = strtoul(str_lo, (char **)NULL, 36);

	}
}

int __UDMCALL UdmAddSearchLimit(UDM_AGENT *Agent, int type, const char *file_name, const char *val){
	uint4 hi, lo, f_hi, f_lo;
	
	if(Agent->nlimits == MAX_SEARCH_LIMIT - 1) return(1);
	
	Agent->limits[Agent->nlimits].type = type;
	strcpy(Agent->limits[Agent->nlimits].file_name, file_name);
	switch(type){
		case 0: UdmDecodeHex8Str(val, &hi, &lo, &f_hi, &f_lo); break;
		case 1: f_hi = hi = 0; f_lo = lo = 0; break;
		case 2: hi=atoi(val); lo=0; f_hi = hi; f_lo = lo; break;
		case 3: hi=UdmStrHash32(val); lo = 0; f_hi = hi; f_lo = 0; break;
	}	
	Agent->limits[Agent->nlimits].hi = hi;
	Agent->limits[Agent->nlimits].lo = lo;
	Agent->limits[Agent->nlimits].f_hi = f_hi;
	Agent->limits[Agent->nlimits].f_lo = f_lo;
	
	Agent->nlimits++;

	UdmLog(Agent, UDM_LOG_DEBUG, "val: %s  %x %x   %x %x", val, hi, lo, f_hi, f_lo);
	
	return(0);
}

int UdmParseQueryString(UDM_AGENT * Agent,UDM_VARLIST * vars,char * query_string){
	char * tok, *lt;
	size_t len;
	char *str = (char *)UdmMalloc((len = strlen(query_string)) + 7);
	char *qs = (char*)UdmStrdup(query_string);
	char qname[256];

	if ((str == NULL) || qs == NULL) {
	  UDM_FREE(str);
	  UDM_FREE(qs);
	  return 1;
	}

	UdmSGMLUnescape(qs);
	
	tok = udm_strtok_r(qs, "&", &lt);
	while(tok){
		char empty[]="";
		char * val;
		const char * lim;
		
		if((val=strchr(tok,'='))){
			*val='\0';
			val++;
		}else{
			val=empty;
		}
		UdmUnescapeCGIQuery(str,val);
		UdmVarListReplaceStr(vars,tok,str);
		udm_snprintf(qname, 256, "query.%s", tok);
		UdmVarListReplaceStr(vars, qname, str);

		sprintf(str,"Limit-%s",tok);
		if((lim=UdmVarListFindStr(vars,str,NULL))){
			int ltype=0;
			const char * type, * fname = NULL;
			char * llt;
			strncpy(str, lim, len);
			
			if((type = udm_strtok_r(str, ":", &llt))) {
			  if(!strcasecmp(type, "category")) {
			    ltype = UDM_LIMTYPE_NESTED; fname = UDM_LIMFNAME_CAT;
			  } else
			  if(!strcasecmp(type, "tag")) {
			    ltype = UDM_LIMTYPE_LINEAR_CRC; fname = UDM_LIMFNAME_TAG;
			  } else
			  if(!strcasecmp(type, "time")) {
			    ltype = UDM_LIMTYPE_TIME; fname = UDM_LIMFNAME_TIME;
			  } else
			  if(!strcasecmp(type, "hostname")) {
			    ltype = UDM_LIMTYPE_LINEAR_CRC; fname = UDM_LIMFNAME_HOST;
			  } else
			  if(!strcasecmp(type, "language")) {
			    ltype = UDM_LIMTYPE_LINEAR_CRC; fname = UDM_LIMFNAME_LANG;
			  } else
			  if(!strcasecmp(type, "content")) {
			    ltype = UDM_LIMTYPE_LINEAR_CRC; fname = UDM_LIMFNAME_CTYPE;
			  } else
			  if(!strcasecmp(type, "siteid")) {
			    ltype = UDM_LIMTYPE_LINEAR_INT; fname = UDM_LIMFNAME_SITE;
			  }
			  if((fname != NULL) && strlen(val)) {
			    UdmAddSearchLimit(Agent,ltype,fname,val);
			  }
			}
		}
		tok = udm_strtok_r(NULL, "&", &lt);
	}
	
	UDM_FREE(str);
	UDM_FREE(qs);
	return 0;
}


char * UdmHlConvert(UDM_WIDEWORDLIST *List,const char * src, UDM_CHARSET * lcs, UDM_CHARSET * bcs) {
	int		*tok, *lt, ctype, *uni;
	char		*hpart, *htxt;
	size_t		len;
	UDM_CONV	lc_uni, uni_bc;
	UDM_CHARSET	*sys_int;
	
	if(!src)return NULL;
	
	if ((len = strlen(src)) == 0) return NULL;
	hpart = (char*)UdmMalloc(len * 14 + 10);
	htxt = (char*)UdmMalloc(len * 14 + 10);
	htxt[0]='\0';
	
	sys_int=UdmGetCharSet("sys-int");
	UdmConvInit(&lc_uni, lcs, sys_int, UDM_RECODE_HTML);
	UdmConvInit(&uni_bc,sys_int,bcs,UDM_RECODE_HTML);
	
	/* Convert to unicode */
	uni = (int *)UdmMalloc((len + 10) * sizeof(int));
	UdmConv(&lc_uni,(char*)uni,sizeof(uni[0])*(len+10),src,len+1);
	
	/* Parse unicode string */
	tok = UdmUniGetSepToken(uni, &lt, &ctype);
	while(tok){
		int found=0;
		size_t slen,flen;
		int euchar;
		size_t uw;

		flen=lt-tok;

		/* Convert token to BrowserCharset */
		euchar=tok[flen];
		tok[flen]=0;
		hpart[0]='\0';
		
		UdmConv(&uni_bc, hpart, len * 14 + 10, (char*)tok, sizeof(*tok) * flen);
		
		tok[flen]=euchar;

		/* Check that it is word to be marked */
		if (List != NULL) {
		  for(uw=0;uw<List->nwords;uw++){
			slen = List->Word[uw].ulen;
			if(flen == slen && !UdmUniStrNCaseCmp(tok,List->Word[uw].uword,flen)){
				found=1;
				break;
			}
		  }
		}
		if(found)strcat(htxt,"\2");
		strcat(htxt,hpart);
		if(found)strcat(htxt,"\3");
		
		tok = UdmUniGetSepToken(NULL, &lt, &ctype);
	}
	UDM_FREE(hpart);
	UDM_FREE(uni);
	/*
	fprintf(stderr,"otxt='%s'\n",src);
	fprintf(stderr,"htxt='%s'\n",htxt);
	*/
	return(htxt);
}

int UdmConvert(UDM_ENV *Conf, UDM_RESULT *Res, UDM_CHARSET *lcs, UDM_CHARSET *bcs){
	size_t		i, len;
	UDM_CONV	lc_bc, lc_bc_text, bc_bc;
	char            *newval, *newtxt;
	UDM_VAR         *Var;
	
	/* Convert word list */
	UdmConvInit(&lc_bc,lcs,bcs,UDM_RECODE_HTML);
	UdmConvInit(&lc_bc_text, lcs, bcs, 0);
	UdmConvInit(&bc_bc,bcs, bcs, UDM_RECODE_HTML);
	for(i=0;i<Res->WWList.nwords;i++) {
		UDM_WIDEWORD	*W=&Res->WWList.Word[i];

		len = strlen(W->word);
		newval = (char*)UdmMalloc(len * 12 + 1);
		UdmConv(&lc_bc,newval,len*12+1,W->word,len+1);
		UDM_FREE(W->word);
		W->word=newval;
	}
	
	/* Convert document sections */
	for(i=0;i<Res->num_rows;i++){
		UDM_DOCUMENT	*D=&Res->Doc[i];
		size_t		sec;
		
		for(sec=0;sec<D->Sections.nvars;sec++){
			Var = &D->Sections.Var[sec];

			len = strlen(Var->val);
			newtxt = (char*)UdmMalloc(len * 12 + 1);

			UdmConv(&lc_bc_text, newtxt, len * 12 + 1, Var->val, len + 1);
			newval = UdmHlConvert(&Res->WWList, Var->val, lcs, bcs);
			UDM_FREE(Var->val);
			UDM_FREE(Var->txt_val);
			Var->val=newval;
			Var->txt_val = newtxt;
		}
	}

	  /* Convert Env->Vars */
	  for (i = 0; i < Conf->Vars.nvars; i++) {
	        Var = &Conf->Vars.Var[i];
	        len = strlen(Var->val);
		newtxt = (char*)UdmMalloc(len * 12 + 1);
		newval = (char*)UdmMalloc(len * 12 + 1);

/*		if (db->DBDriver != UDM_DB_SEARCHD)*/  /* FIXME: need unification in charset from different UDM_DB */
		  UdmConv(&lc_bc, newval, len * 12 + 1, Var->val, len + 1);
/*		else 
		  UdmConv(&bc_bc, newval, len * 12 + 1, Var->val, len + 1);*/
		UdmConv(&lc_bc_text, newtxt, len * 12 + 1, Var->val, len + 1);
		UDM_FREE(Var->val);
		UDM_FREE(Var->txt_val);
		Var->val=newval;
		Var->txt_val = newtxt;
	}

	return UDM_OK;
}

char* UdmRemoveHiLightDup(const char *s){
	size_t	len=strlen(s)+1;
	char	*res = (char*)UdmMalloc(len);
	char	*d;
	
	for(d=res;s[0];s++){
		switch(s[0]){
			case '\2':
			case '\3':
				break;
			default:
				*d++=*s;
		}
	}
	*d='\0';
	return res;
}



int UdmCatToTextBuf(UDM_CATEGORY *C, char *textbuf, size_t len) {
	char	*end;
	size_t  i;
	
	textbuf[0]='\0';

/*	udm_snprintf(textbuf,len,"<CAT");
	end=textbuf+strlen(textbuf);*/
	end = textbuf;
	
	for(i = 0; i < C->ncategories; i++) {
	  udm_snprintf(end, len - strlen(textbuf), "<CAT\tid=\"%d\"\tpath=\"%s\"\tlink=\"%s\"\tname=\"%s\">\r\n",
		   C->Category[i].rec_id, C->Category[i].path, C->Category[i].link, C->Category[i].name );
	  end = end + strlen(end);
	}
/*	strcpy(end,">");*/
	return UDM_OK;
}

int UdmCatFromTextBuf(UDM_CATEGORY *C, char *textbuf) {
	const char	*htok, *last;
	UDM_HTMLTOK	tag;
	size_t		i, c;
	
	if (textbuf == NULL) return UDM_OK;
	UdmHTMLTOKInit(&tag);
	
	htok=UdmHTMLToken(textbuf,&last,&tag);
	
	if(!htok || tag.type != UDM_HTML_TAG)
	  return UDM_OK;

	C->Category = (UDM_CATITEM*)UdmRealloc(C->Category, sizeof(UDM_CATITEM) * ((c = C->ncategories) + 1));
	bzero((void*)&C->Category[c], sizeof(UDM_CATITEM));
	
	for(i = 1; i < tag.ntoks; i++){
		size_t	nlen = tag.toks[i].nlen;
		size_t	vlen = tag.toks[i].vlen;
		char	*name = UdmStrndup(tag.toks[i].name, nlen);
		char	*data = UdmStrndup(tag.toks[i].val, vlen);

		if (!strcmp(name, "id")) {
		  C->Category[c].rec_id = atoi(data);
		} else if (!strcmp(name, "path")) {
		  strncpy(C->Category[c].path, data, 128);
		} else if (!strcmp(name, "link")) {
		  strncpy(C->Category[c].link, data, 128);
		} else if (!strcmp(name, "name")) {
		  strncpy(C->Category[c].name, data, 128);
		}

		UDM_FREE(name);
		UDM_FREE(data);
	}

	C->ncategories++;
	return UDM_OK;
}

int *UdmUniSegment(UDM_AGENT *Indexer, int *ustr, const char *lang) {
	UDM_CHARSET     *tis_cs;
	UDM_CONV        uni_tis, tis_uni;
	/* FIXME: Remove if not needed: */
/*	char            *tistr, *tistr_seg; */
#ifdef CHASEN
	UDM_CHARSET     *eucjp_cs;
	UDM_CONV        uni_eucjp, eucjp_uni;
	char            *eucstr, *eucstr_seg;
#endif
#ifdef MECAB
	UDM_CHARSET     *sjis_cs;
	UDM_CONV        uni_sjis, sjis_uni;
	char            *sjisstr, *sjisstr_seg;
#endif
#ifdef HAVE_CHARSET_gb2312
	UDM_CHARSET     *gb_cs;
	UDM_CONV        uni_gb, gb_uni;
/*	char            *gbstr, *gbstr_seg; */
#endif
	UDM_CHARSET	*sys_int;
	size_t          reslen, dstlen = UdmUniLen(ustr);


	sys_int = UdmGetCharSet("sys-int");

	tis_cs = UdmGetCharSet("tis-620");
	UdmConvInit(&tis_uni, tis_cs, sys_int, UDM_RECODE_HTML);
	UdmConvInit(&uni_tis, sys_int, tis_cs, UDM_RECODE_HTML);
	
#ifdef CHASEN
	eucjp_cs = UdmGetCharSet("euc-jp");
	if (!eucjp_cs) eucjp_cs = UdmGetCharSet("sys-int");
	UdmConvInit(&uni_eucjp, sys_int, eucjp_cs, UDM_RECODE_HTML);
	UdmConvInit(&eucjp_uni, eucjp_cs, sys_int, UDM_RECODE_HTML);
#endif
#ifdef MECAB
	sjis_cs = UdmGetCharSet("sjis");
	if (!sjis_cs) sjis_cs = UdmGetCharSet("sys-int");
	UdmConvInit(&uni_sjis, sys_int, sjis_cs, UDM_RECODE_HTML);
	UdmConvInit(&sjis_uni, sjis_cs, sys_int, UDM_RECODE_HTML);
#endif
#ifdef HAVE_CHARSET_gb2312
	gb_cs = UdmGetCharSet("gb2312");
	UdmConvInit(&uni_gb, sys_int, gb_cs, UDM_RECODE_HTML);
	UdmConvInit(&gb_uni, gb_cs, sys_int, UDM_RECODE_HTML);
#endif


#ifdef CHASEN

	if (lang == NULL || !strncasecmp(lang, "ja", 2)) {
	  eucstr = (char*)UdmMalloc(12 * dstlen + 1);
	  UdmConv(&uni_eucjp, eucstr, 12 * dstlen + 1, (char*)ustr, dstlen + 1);

	  UDM_GETLOCK(Indexer, UDM_LOCK_SEGMENTER);
	  eucstr_seg = chasen_sparse_tostr(eucstr);
	  UDM_RELEASELOCK(Indexer, UDM_LOCK_SEGMENTER);

	  reslen = strlen(eucstr_seg) + 1;
	  ustr = (int*)UdmRealloc(ustr, reslen * sizeof(int));
	  UdmConv(&eucjp_uni, (char*)ustr, reslen * sizeof(int), eucstr_seg, reslen);
	  UDM_FREE(eucstr);
	  dstlen = UdmUniLen(ustr);
	}
#endif
#ifdef MECAB
	if (lang == NULL || !strncasecmp(lang, "ja", 2)) {
	  sjisstr = (char*)UdmMalloc(12 * dstlen + 1);
	  UdmConv(&uni_sjis, sjisstr, 12 * dstlen + 1, (char*)ustr, sizeof(*ustr) * (dstlen + 1));

	  UDM_GETLOCK(Indexer, UDM_LOCK_CONF);
#ifdef HAVE_PTHREADS
	  mecab_lock(Indexer->Conf->mecab);
#endif
	  sjisstr_seg = mecab_sparse_tostr(Indexer->Conf->mecab, sjisstr);
#ifdef HAVE_PTHREADS
	  mecab_unlock(Indexer->Conf->mecab);
#endif
	  UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);

	  reslen = strlen(sjisstr_seg) + 1;
	  ustr = (int*)UdmRealloc(ustr, reslen * sizeof(int));
	  UdmConv(&sjis_uni, (char*)ustr, reslen * sizeof(int), sjisstr_seg, reslen);
	  UDM_FREE(sjisstr);
	  dstlen = UdmUniLen(ustr);
	}
#endif
#if defined(CHASEN) || defined(MECAB)
	else
#endif
	  if (lang == NULL || !strncasecmp(lang, "zh", 2)) {
	    int *seg_ustr;
	    UDM_GETLOCK(Indexer, UDM_LOCK_CONF);
	    seg_ustr = UdmSegmentByFreq(&Indexer->Conf->Chi, ustr);
	    UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);
	    if (seg_ustr != NULL) {
	      UDM_FREE(ustr);
	      ustr = seg_ustr;
	    }
	    dstlen = UdmUniLen(ustr);
	  }
	
#if defined(CHASEN) || defined(MECAB)
	  else
#endif
	    if (lang == NULL || !strncasecmp(lang, "th", 2)) {
	      int *seg_ustr;
	      UDM_GETLOCK(Indexer, UDM_LOCK_CONF);
	      seg_ustr = UdmSegmentByFreq(&Indexer->Conf->Thai, ustr);
	      UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);
	      if (seg_ustr != NULL) {
		UDM_FREE(ustr);
		ustr = seg_ustr;
	      }
	      dstlen = UdmUniLen(ustr);
	    }

	return ustr;
}


