/* Copyright (C) 2000-2006 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_WINSOCK_H
#include <winsock.h>
#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#ifdef HAVE_RESOLV_H
#include <resolv.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#include <assert.h>


#include "udm_common.h"
#include "udm_db.h"
#include "udm_db_int.h"
#include "udm_sqldbms.h"
#include "udm_utils.h"
#include "udm_url.h"
#include "udm_sdp.h"
#include "udm_vars.h"
#include "udm_mutex.h"
#include "udm_searchtool.h"
#include "udm_result.h"
#include "udm_log.h"
#include "udm_agent.h"
#include "udm_proto.h"
#include "udm_host.h"
#include "udm_hash.h"
#include "udm_doc.h"
#include "udm_services.h"
#include "udm_xmalloc.h"
#include "udm_searchcache.h"
#include "udm_store.h"
#include "udm_match.h"
#include "udm_word.h"

#define UDM_THREADINFO(A,s,m)	if(A->Conf->ThreadInfo)A->Conf->ThreadInfo(A,s,m)


void *UdmDBInit(void *vdb)
{
  UDM_DB *db=vdb;
  size_t  nbytes=sizeof(UDM_DB);
  
  if(!db)
  {
    db=(UDM_DB*)UdmMalloc(nbytes);
    bzero((void*)db, nbytes);
    db->freeme=1;
  }
  else
  {
    bzero((void*)db, nbytes);
  }
  db->numtables=1;

#if HAVE_SQL
  UdmWordCacheInit(&db->WordCache);
#endif
 
  return db;
}


void UdmDBFree(void *vdb)
{
  UDM_DB  *db=vdb;

  UDM_FREE(db->DBName);
  UDM_FREE(db->where);
  UDM_FREE(db->from);
  
#if HAVE_SQL
  if (db->connected) UdmSQLClose(db);
#endif
  
  UdmVarListFree(&db->Vars);
  if(db->freeme)UDM_FREE(vdb);
  return;
}


/*
int UdmURLData(UDM_ENV *Conf, UDM_URLDATALIST *L, UDM_DB *db)
{
  int  res=UDM_OK;
  
  L->nitems=0;
  
#ifdef HAVE_SQL
  res=UdmURLDataSQL(Conf, L, db);
#endif
  return res;
}
*/

__C_LINK int __UDMCALL UdmClearDatabase(UDM_AGENT *A)
{
  int  res=UDM_ERROR;
  UDM_DB  *db;
  size_t i, dbto =  A->Conf->dbl.nitems;

  for (i = 0; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];
#ifdef HAVE_SQL
    res = UdmClearDBSQL(A, db);
    UDM_FREE(db->where);          /* clear db->where for next parameters */
#endif
    if (res != UDM_OK) break;
  }
  if(res!=UDM_OK)
  {
    strcpy(A->Conf->errstr,db->errstr);
  }
  return res;
}


static int
UdmRegisterChild(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc)
{
  int result= UDM_OK;
  
  UDM_VAR    *Sec;
  const char  *parent=NULL;
  int    parent_id=0;
    
  UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_CONF);
  
  if((Sec=UdmVarListFind(&Doc->Sections,"Header.References")) && Sec->val)
  {
    /* 
      References contains all message IDs of my predecessors,
      space separated my direct parent is the last in the list.
    */
    if ((parent= strrchr(Sec->val,' ')))
    {
      /* parent now points to the space character, skip it */
      ++parent;
    }
    else
    {
      /* there is only one entry in references, so this is my parent */
      parent=Sec->val;
    }  
  }
    
  /* get parent from database */
  if (parent && strlen(parent) && strchr(parent,'@'))
  {
    UDM_DOCUMENT Msg;
      
    UdmDocInit(&Msg);
    UdmVarListReplaceStr(&Msg.Sections,"Header.Message-ID",parent);
    result= UdmURLActionNoLock(Indexer, &Msg, UDM_URL_ACTION_FINDBYMSG);
    parent_id = UdmVarListFindInt(&Msg.Sections,"ID",0);
    UdmVarListReplaceInt(&Doc->Sections,"Header.Parent-ID",parent_id);
    UdmDocFree(&Msg);
  }
  
  /* Now register me with my parent  */
  if(parent_id)
    result = UdmURLActionNoLock(Indexer, Doc, UDM_URL_ACTION_REGCHILD);
  return result;
}


static int DocUpdate(UDM_AGENT * Indexer, UDM_DOCUMENT *Doc)
{
  int        rc= UDM_OK;
  int        status=UdmVarListFindInt(&Doc->Sections,"Status",0);
  urlid_t    url_id = (urlid_t)UdmVarListFindInt(&Doc->Sections, "ID", 0);
  time_t     next_index_time= time(NULL) + Doc->Spider.period;
  char       dbuf[64];
  int        use_newsext; 
  int        action;

  UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_CONF);

  use_newsext= !strcasecmp(UdmVarListFindStr(&Indexer->Conf->Vars,"NewsExtensions","no"),"yes");

  if (Doc->method == UDM_METHOD_VISITLATER)
  {
    action= UDM_URL_ACTION_SUPDATE;
    goto update;
  }
  
  /* First of all check that URL must be delated */
  if(Doc->method==UDM_METHOD_DISALLOW)
  {
    UdmLog(Indexer,UDM_LOG_ERROR,"Deleting %s", UdmVarListFindStr(&Doc->Sections, "URL", ""));
    rc= UdmURLActionNoLock(Indexer, Doc, UDM_URL_ACTION_DELETE);
    return rc;
  }

  switch(status){
  
  case 0: /* No HTTP code */
    UdmHostErrorIncrement(&Indexer->Conf->Hosts, Doc->connp.hostname);
    UdmLog(Indexer,UDM_LOG_ERROR,"No HTTP response status");
    next_index_time=time(NULL)+Doc->Spider.net_error_delay_time;
    action= UDM_URL_ACTION_SUPDATE;
    break;
  
  case UDM_HTTP_STATUS_OK:                            /* 200 */
  case UDM_HTTP_STATUS_PARTIAL_OK:                    /* 206 */
    if(!UdmVarListFind(&Doc->Sections,"Content-Type"))
    {
      UdmLog(Indexer,UDM_LOG_ERROR,"No Content-type header");
      next_index_time=time(NULL)+Doc->Spider.net_error_delay_time;
      UdmVarListReplaceInt(&Doc->Sections,"Status",UDM_HTTP_STATUS_INTERNAL_SERVER_ERROR);
      UdmHostErrorIncrement(&Indexer->Conf->Hosts, Doc->connp.hostname);
      action= UDM_URL_ACTION_SUPDATE;
      break;
    }
    else
    {
      UdmHostErrorReset(&Indexer->Conf->Hosts, Doc->connp.hostname);

      /* Check clones */
      if(Doc->method == UDM_METHOD_GET && Doc->Spider.use_clones)
      {
        urlid_t    origin_id = 0;
        if (UDM_OK != (rc= UdmURLActionNoLock(Indexer, Doc, UDM_URL_ACTION_FINDORIG)))
          return rc;
        origin_id= (urlid_t)UdmVarListFindInt(&Doc->Sections,"Origin-ID",0);
  
        if(origin_id && origin_id != url_id)
        {
          if (UdmNeedLog(UDM_LOG_EXTRA))
            UdmLog(Indexer, UDM_LOG_EXTRA, "Duplicate Document %s with #%d", 
                   UdmVarListFindStr(&Doc->Sections, "URL", ""), origin_id);
          action= UDM_URL_ACTION_UPDCLONE;
          break;
        }
      }
  
      /* Check that document wasn't modified since last indexing */
      if ((UdmVarListFindInt(&Doc->Sections,"crc32", 0) != 0) 
          &&  (UdmVarListFindInt(&Doc->Sections,"crc32old",0)==UdmVarListFindInt(&Doc->Sections,"crc32",0)) 
          &&  (!(Indexer->flags&UDM_FLAG_REINDEX)))
      {
        action= UDM_URL_ACTION_SUPDATE;
        break;
      }
  
      /* For NEWS extension: get rec_id from my */
      /* parent out of db (if I have one...)    */
      if (use_newsext && (UDM_OK != (rc= UdmRegisterChild(Indexer, Doc))))
        return rc;

      action= UDM_URL_ACTION_LUPDATE;
      break;
    }

  case UDM_HTTP_STATUS_NOT_MODIFIED:                  /* 304 */
    action= UDM_URL_ACTION_SUPDATE;
    break;
  
  case UDM_HTTP_STATUS_MULTIPLE_CHOICES:              /* 300 */
  case UDM_HTTP_STATUS_MOVED_PARMANENTLY:             /* 301 */
  case UDM_HTTP_STATUS_MOVED_TEMPORARILY:             /* 302 */
  case UDM_HTTP_STATUS_SEE_OTHER:                     /* 303 */
  case UDM_HTTP_STATUS_USE_PROXY:                     /* 305 */
  case UDM_HTTP_STATUS_BAD_REQUEST:                   /* 400 */
  case UDM_HTTP_STATUS_UNAUTHORIZED:                  /* 401 */
  case UDM_HTTP_STATUS_PAYMENT_REQUIRED:              /* 402 */
  case UDM_HTTP_STATUS_FORBIDDEN:                     /* 403 */
  case UDM_HTTP_STATUS_NOT_FOUND:                     /* 404 */
  case UDM_HTTP_STATUS_METHOD_NOT_ALLOWED:            /* 405 */
  case UDM_HTTP_STATUS_NOT_ACCEPTABLE:                /* 406 */
  case UDM_HTTP_STATUS_PROXY_AUTHORIZATION_REQUIRED:  /* 407 */
  case UDM_HTTP_STATUS_REQUEST_TIMEOUT:               /* 408 */
  case UDM_HTTP_STATUS_CONFLICT:                      /* 409 */
  case UDM_HTTP_STATUS_GONE:                          /* 410 */
  case UDM_HTTP_STATUS_LENGTH_REQUIRED:               /* 411 */
  case UDM_HTTP_STATUS_PRECONDITION_FAILED:           /* 412 */
  case UDM_HTTP_STATUS_REQUEST_ENTITY_TOO_LARGE:      /* 413 */
  case UDM_HTTP_STATUS_REQUEST_URI_TOO_LONG:          /* 414 */  
  case UDM_HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:        /* 415 */
  case UDM_HTTP_STATUS_NOT_IMPLEMENTED:               /* 501 */
  case UDM_HTTP_STATUS_BAD_GATEWAY:                   /* 502 */
  case UDM_HTTP_STATUS_NOT_SUPPORTED:                 /* 505 */  
    /*
      Remove words and outgoing links from database.
      Keep "urlinfo" and "url" values for DBMode=blob searches,
      as well as for cached search results. Information from
      these two tables will be deleted later, after "HoldBadHref"
    */
    action= UDM_URL_ACTION_DUPDATE;
    break;
  
  case UDM_HTTP_STATUS_INTERNAL_SERVER_ERROR:         /* 500 */
  case UDM_HTTP_STATUS_SERVICE_UNAVAILABLE:           /* 503 */
  case UDM_HTTP_STATUS_GATEWAY_TIMEOUT:               /* 504 */
  
    /* Keep words in database                */
    /* We'll retry later, maybe host is down */
    UdmHostErrorIncrement(&Indexer->Conf->Hosts, Doc->connp.hostname);
    next_index_time=time(NULL)+Doc->Spider.net_error_delay_time;
    action= UDM_URL_ACTION_SUPDATE;
    break;
  
  default: /* Unknown status, retry later */
    UdmLog(Indexer,UDM_LOG_WARN,"HTTP %d We don't yet know how to handle it, skipped",status);
    action= UDM_URL_ACTION_SUPDATE;
    break;
  }

update:

  UdmTime_t2HttpStr(next_index_time,dbuf);
  UdmVarListReplaceStr(&Doc->Sections,"Next-Index-Time",dbuf);  
  rc= UdmURLActionNoLock(Indexer, Doc, action);
  return rc;
}


static int UdmDocUpdate(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc)
{
  size_t    maxsize;
  size_t    sec;
  int    flush=0;
  int    rc=UDM_OK;
  UDM_RESULT  *I = &Indexer->Indexed;

  UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_CONF);
  maxsize = UdmVarListFindInt(&Indexer->Conf->Vars,"DocMemCacheSize",0) * 1024 * 1024;

  if (maxsize > 0 && I->memused > 0) UdmLog(Indexer, UDM_LOG_EXTRA, "DocCacheSize: %d/%d", I->memused, maxsize);
  if (Doc)
  {
    I->memused += sizeof(UDM_DOCUMENT);
    /* Aproximation for Words memory usage  */
    I->memused += Doc->Words.nwords * (sizeof(UDM_WORD) + 5);
    /* Aproximation for CrossWords memory usage */
    I->memused += Doc->CrossWords.ncrosswords * (sizeof(UDM_CROSSWORD) + 35);
    /* Aproximation for Sections memory usage */
    for(sec = 0; sec < Doc->Sections.nvars; sec++) {
      I->memused += sizeof(UDM_VAR);
      I->memused += Doc->Sections.Var[sec].maxlen * 3 + 10;
    }
    I->memused += (sizeof(UDM_HREF) + 35) * Doc->Hrefs.nhrefs;
    if (I->memused >= maxsize) flush = 1;
    if (I->num_rows >= 1024) flush = 1;
  } else flush = 1;

  if (flush)
  {
    size_t  docnum;

    if (I->num_rows)
      UdmLog(Indexer, UDM_LOG_EXTRA, "Flush %d document(s)", I->num_rows + ((Doc != NULL) ? 1 : 0));
    
    if (Doc)
    {
      UDM_THREADINFO(Indexer, "Updating", UdmVarListFindStr(&Doc->Sections, "URL", ""));
      if(UDM_OK != (rc = DocUpdate(Indexer, Doc))) return rc;
      UdmDocFree(Doc);
    }
    
    for (docnum = 0; docnum < I->num_rows; docnum++)
    {
      /* Flush all hrefs from cache in-memory    */
      /* cache into database. Note, this must    */
      /* be done before call of  StoreCrossWords */
      /* because we need to know all new URL IDs */
      
      UDM_THREADINFO(Indexer, "Updating", UdmVarListFindStr(&I->Doc[docnum].Sections, "URL", ""));
      if(UDM_OK != (rc = DocUpdate(Indexer, &I->Doc[docnum])))
        return rc;
    }
    
    if (Indexer->Indexed.num_rows) UdmResultFree(&Indexer->Indexed);
  }
  else
  {
    /* Add document into cache */
    I->Doc=(UDM_DOCUMENT*)UdmRealloc(I->Doc, (I->num_rows + 1) * sizeof(UDM_DOCUMENT));
    I->Doc[I->num_rows] = Doc[0];
    I->Doc[I->num_rows].freeme = 0;
    I->num_rows++;
  }
  return rc;
}

int UdmURLActionNoLock(UDM_AGENT *A, UDM_DOCUMENT *D, int cmd)
{
  int res=UDM_ERROR, execflag = 0;
  size_t i, dbfrom = 0, dbto;
  UDM_DB  *db;
  int dbnum = (cmd == UDM_URL_ACTION_GET_CACHED_COPY ? UdmVarListFindInt(&D->Sections, "dbnum", 0) : -1);

  UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_CONF);

  if(cmd == UDM_URL_ACTION_FLUSH)
    return UdmDocUpdate(A, D);

#ifdef USE_TRACE
  fprintf(A->TR, "[%d] URLAction: %d\n", A->handle, cmd);
#endif
  
  dbto =  A->Conf->dbl.nitems;
  if (dbnum < 0 && D != NULL)
  {
    udmhash32_t url_id=UdmVarListFindInt(&D->Sections,"URL_ID", 0);
    udmhash32_t seed= (((url_id) ? url_id :
                       UdmStrHash32(UdmVarListFindStr(&D->Sections, "URL", ""))) & 0xFF);
    dbfrom= dbto= seed * A->Conf->dbl.nitems / 256;
    dbto++;
  }

  for (i = dbfrom; i < dbto; i++)
  {
    if (dbnum >= 0 && dbnum != i) continue;
    db = &A->Conf->dbl.db[i];

    UDM_GETLOCK(A, UDM_LOCK_DB);
    switch(db->DBDriver)
    {
      case UDM_DB_SEARCHD:
        res = UdmSearchdURLAction(A, D, cmd, db);
        execflag = 1;
        break;
      
#ifdef HAVE_SQL
      default:
        res=UdmURLActionSQL(A,D,cmd,db);
        if (cmd == UDM_URL_ACTION_EXPIRE)
        {
          UDM_FREE(db->where);  /* clear db->where for next parameters */
          UDM_FREE(db->from);
        }
        execflag = 1;
        break;
#endif
    }
    
    if (res != UDM_OK && execflag)
    {
      UdmLog (A, UDM_LOG_ERROR, db->errstr);
    }
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
    if (res != UDM_OK) break;
  }
  
  if ((res != UDM_OK) && !execflag)
  {
    UdmLog(A, UDM_LOG_ERROR, "no supported DBAddr specified");
  }
  return res;
}


/*
  URL action with automatic UDM_LOCK_CONF locking
*/
__C_LINK int __UDMCALL UdmURLAction(UDM_AGENT *A, UDM_DOCUMENT *D, int cmd)
{
  int rc;
  UDM_GETLOCK(A, UDM_LOCK_CONF);
  rc= UdmURLActionNoLock(A, D, cmd);
  UDM_RELEASELOCK(A, UDM_LOCK_CONF);
  return rc;
}



__C_LINK int __UDMCALL UdmTargets(UDM_AGENT *A)
{
  int  res=UDM_ERROR;
  UDM_DB  *db;
  size_t i, dbfrom = 0, dbto;

  UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_CONF);
  dbto =  A->Conf->dbl.nitems;
  UdmResultFree(&A->Conf->Targets);

  for (i = dbfrom; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];
    UDM_GETLOCK(A, UDM_LOCK_DB);
#ifdef HAVE_SQL
    res = UdmTargetsSQL(A, db);
#endif
    if(res != UDM_OK)
    {
      UdmLog(A, UDM_LOG_ERROR, db->errstr);
    }
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
    if (res != UDM_OK) break;
  }
  return res;
}

__C_LINK int __UDMCALL UdmResAction(UDM_AGENT *A, UDM_RESULT *R, int cmd)
{
  int  rc= UDM_ERROR;
  UDM_DB  *db;
  size_t i, dbfrom = 0, dbto;
  
  dbto =  A->Conf->dbl.nitems;

  for (i = dbfrom; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];
    UDM_GETLOCK(A, UDM_LOCK_DB);
    switch(db->DBDriver)
    {
      case UDM_DB_SEARCHD:
        rc= UDM_OK;
        break;

#ifdef HAVE_SQL
      default:
        rc= UdmResActionSQL(A, R, cmd, db, i);
#endif
    }
    if(rc != UDM_OK)
    {
      UdmLog(A, UDM_LOG_ERROR, db->errstr);
    }
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
    if (rc != UDM_OK) break;
  }
  return rc;
}


__C_LINK int __UDMCALL UdmCatAction(UDM_AGENT *A, UDM_CATEGORY *C, int cmd)
{
  UDM_DB  *db;
  int  res=UDM_ERROR;
  size_t i, dbfrom = 0, dbto;

  UDM_GETLOCK(A, UDM_LOCK_CONF);
  dbto =  A->Conf->dbl.nitems;
  UDM_RELEASELOCK(A, UDM_LOCK_CONF);

  for (i = dbfrom; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];
    UDM_GETLOCK(A, UDM_LOCK_DB);
    switch(db->DBDriver)
    {
      case UDM_DB_SEARCHD:
        res = UdmSearchdCatAction(A, C, cmd, db);
        break;
#ifdef HAVE_SQL
      default:
        res=UdmCatActionSQL(A,C,cmd,db);
#endif
    }
    if(res != UDM_OK)
    {
      UdmLog(A, UDM_LOG_ERROR, db->errstr);
    }
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
    if (res != UDM_OK) break;
  }
  return res;
}

__C_LINK int __UDMCALL UdmSrvAction(UDM_AGENT *A, UDM_SERVERLIST *S, int cmd)
{
  UDM_DB  *db;
  int  res=UDM_ERROR;
  size_t i, dbfrom = 0, dbto;
  
  UDM_GETLOCK(A, UDM_LOCK_CONF);
  dbto =  A->Conf->dbl.nitems;

  for (i = dbfrom; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];

    UDM_GETLOCK(A, UDM_LOCK_DB); 
#ifdef HAVE_SQL
    res = UdmSrvActionSQL(A, S, cmd, db);
#endif
    if(res != UDM_OK){
    UdmLog(A, UDM_LOG_ERROR, db->errstr);
    }
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
    if (res != UDM_OK) break;
  }
  UDM_RELEASELOCK(A, UDM_LOCK_CONF);
  return res;
}

static const int search_cache_size=1000;

/* Allocate an array for documents information */
static int
UdmResAllocDoc(UDM_RESULT *Res)
{
  size_t i;
  if (!Res->Doc && Res->num_rows > 0)
  {
    Res->Doc = (UDM_DOCUMENT*)UdmMalloc(sizeof(UDM_DOCUMENT) * (Res->num_rows));
    for (i= 0; i < Res->num_rows; i++)
      UdmDocInit(&Res->Doc[i]);
  }
  return UDM_OK;
}



static int
UdmFindWordsDB(UDM_AGENT *A, UDM_RESULT *CurRes, UDM_DB *db,
               size_t num_best_rows)
{
  int rc= UDM_OK;
  const char *dbaddr= UdmVarListFindStr(&db->Vars,"DBAddr","<noaddr>");
      
  UdmLog(A, UDM_LOG_DEBUG, "UdmFind for %s", dbaddr);
      
  switch(db->DBDriver)
    {
    case UDM_DB_SEARCHD:
      rc= UdmFindWordsSearchd(A, CurRes, db);
      break;
#ifdef HAVE_SQL
    default:
      if (UDM_OK != (rc= UdmQueryCacheGetSQL(A, CurRes, db)))
        return rc;
      if (!CurRes->URLData.nitems)
      {
        rc= UdmFindWordsSQL(A, CurRes, db, num_best_rows);
        if (rc == UDM_OK && CurRes->URLData.nitems)
          rc= UdmQueryCachePutSQL(A, CurRes, db);
      }
      break;
#endif
  }
  return rc;
}

typedef struct search_param_st
{
  UDM_AGENT *Agent;
  UDM_RESULT *Res;
  UDM_DB *db;
  int rc;
  void *thd;
  size_t num_best_rows;
} UDM_SPRM;


static
void *SearchThread(void *arg)
{
  UDM_SPRM *sprm= (UDM_SPRM*) arg;
  sprm->rc= UdmFindWordsDB(sprm->Agent, sprm->Res, sprm->db, sprm->num_best_rows);
  return NULL;
}


static int
UdmResultSetMachineNumber(UDM_URLDATALIST *List, size_t num)
{
  size_t i;
  UDM_URLDATA *Item= List->Item;

  for(i= 0; i < List->nitems; i++)
  {
    /* 
      We use (256-i) to sort a document from the first database
      before the same document from the second database.
    */
    Item[i].coord = (Item[i].coord << 8) + (255 - (num & 255));
   }
  return UDM_OK;
}


static int
UdmResultJoin(UDM_AGENT *A,
              UDM_RESULT *TmpRes, size_t nresults,
              UDM_RESULT *Res)
{
  size_t i, max_total_found= 0;
  unsigned long ticks;
  
  for (Res->URLData.nitems= 0,
       Res->num_rows= 0,
       Res->total_found= 0, i= 0; i < nresults; i++)
  {
    size_t j;
    if (max_total_found < TmpRes[i].total_found)
      max_total_found= TmpRes[i].total_found;
    Res->total_found+= TmpRes[i].total_found;
    Res->num_rows+= TmpRes[i].num_rows;
    Res->URLData.nitems+= TmpRes[i].URLData.nitems;
    for (j= 0; j < TmpRes[i].WWList.nwords; j++)
      UdmWideWordListAddForStat(&Res->WWList, &TmpRes[i].WWList.Word[j]);
  }

  ticks= UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start joining results from %d dbs", nresults);

  if (Res->URLData.nitems > 0)
  {
    size_t offs;

    Res->URLData.Item= (UDM_URLDATA*)UdmMalloc(sizeof(UDM_URLDATA)
                                               * Res->URLData.nitems);
    for (offs= 0, i= 0; i < nresults; offs+= TmpRes[i].URLData.nitems, i++)
    {
      UDM_RESULT  *CurRes= &TmpRes[i];
      UDM_URLDATA *CurData= TmpRes[i].URLData.Item;
      size_t ncoords= CurRes->URLData.nitems;
      if(CurData)
      {
        UdmResultSetMachineNumber(&CurRes->URLData, i);
        memcpy(Res->URLData.Item + offs, CurData,
               sizeof(CurData[0]) * ncoords);
      }
      UDM_FREE(CurRes->URLData.Item);
    }
  }

  if (nresults > 1)
  {
    int use_site_id= UdmVarListFindBool(&A->Conf->Vars, "GroupBySite", 0) &&
                     (UdmVarListFindInt(&A->Conf->Vars, "site", 0) == 0);
    if (use_site_id)
    {
      UdmURLDataSortBySite(&Res->URLData);
      UdmURLDataGroupBySite(&Res->URLData);
      /*
        TODO: add better approximation of total results
        found with cluster. This number will be between
        max_total_found and Res->total_found, and will
        depend on overlap between the sites:
        (Res->URLData.nitems / Res->total_found)
      */
      Res->total_found= Res->URLData.nitems;
    }
    if (Res->URLData.nitems)
      UdmURLDataSortByPattern(&Res->URLData,
                              UdmVarListFindStr(&A->Conf->Vars, "s", "R"));
  }

  ticks= UdmStartTimer() - ticks;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  joining results:\t%.2f", (float)ticks / 1000);

  return UDM_OK;
}


static int
UdmFindWordsMulDB(UDM_AGENT *A, UDM_RESULT *TmpRes,
                  UDM_RESULT *Res, size_t num_best_rows)
{
  size_t i, ndatabases= A->Conf->dbl.nitems;
  UDM_SPRM search_param[256];
  int use_threads= A->Conf->ThreadCreate != NULL && ndatabases > 1;
  int rc= UDM_OK;
  
  /* Check if all drivers are thread safe */
  for (i= 0; i < ndatabases; i++)
  {
    UDM_DB *db = &A->Conf->dbl.db[i];
    /* TODO: link against libmysqlclient_r */
    if (db->DBDriver != UDM_DB_SEARCHD)
    {
      use_threads= 0;
      break;
    }
  }
    
  for (i= 0; i < ndatabases; i++)
  {
    UDM_RESULT *CurRes= &TmpRes[i];
    UDM_DB *db = &A->Conf->dbl.db[i];
    CurRes[0]= Res[0];    /* Copy bool items */
    UdmWideWordListCopy(&CurRes->WWList, &Res->WWList);

    if (use_threads)
    {
      void *thd;
      UdmLog(A, UDM_LOG_DEBUG, "Starting thread[%d]", i + 1);
      search_param[i].Agent= UdmAgentInit(NULL, A->Conf, i + 1);
      search_param[i].Res= CurRes;
      search_param[i].db= db;
      search_param[i].num_best_rows= num_best_rows;
      A->Conf->ThreadCreate(&thd, SearchThread, (void*) &search_param[i]);
      search_param[i].thd= thd;
    }
    else
    {
      if (UDM_OK != (rc= UdmFindWordsDB(A, CurRes, db, num_best_rows)))
        return rc;
    }
  }
    
  if (use_threads)
  {
    unsigned long long ticks= UdmStartTimer();
    UdmLog(A, UDM_LOG_DEBUG, "Creating threads");
    for (i= 0; i < ndatabases; i++)
    {
      void *thd= search_param[i].thd;
      UdmLog(A, UDM_LOG_DEBUG, "Joining thread[%d]", i + 1);
      A->Conf->ThreadJoin(thd);
    }
    ticks= UdmStartTimer() - ticks;
    UdmLog(A, UDM_LOG_DEBUG, "Threads finished: %.2f", (float) ticks/1000);
  }
  
  if (use_threads)
  {
    for (i= 0; i < ndatabases; i++)
      UdmAgentFree(search_param[i].Agent);
  }

  UdmResultJoin(A, TmpRes, ndatabases, Res);
  
  return rc;
}


static int
UdmFindWords(UDM_AGENT *A, UDM_RESULT *TmpRes,
             UDM_RESULT *Res, size_t num_best_rows)
{
  const char  *cache_mode= UdmVarListFindStr(&A->Conf->Vars, "Cache", "no");
  int res=    UDM_OK;
  size_t      ResultsLimit = UdmVarListFindUnsigned(&A->Conf->Vars, "ResultsLimit", 0);

  if( strcasecmp(cache_mode, "yes") || UdmSearchCacheFind(A, Res))
  {
    if (A->Conf->dbl.nitems > 1)
    {
      if (UDM_OK  != (res= UdmFindWordsMulDB(A, TmpRes, Res, num_best_rows)))
        return res;
    }
    else
    {
      UDM_DB *db = &A->Conf->dbl.db[0];
      if (UDM_OK != (res= UdmFindWordsDB(A, Res, db, num_best_rows)))
        return res;
      UdmResultSetMachineNumber(&Res->URLData, 0);
    }

    if (ResultsLimit > 0 && ResultsLimit < Res->URLData.nitems)
    {
      Res->URLData.nitems= ResultsLimit;
    }

    if((!strcasecmp(cache_mode,"yes"))&&(search_cache_size>-1))
    {
      fflush(stdout);
      fflush(stderr);
      UdmSearchCacheStore(A, Res);
    }
  }
  return res;
}


static int CreateAlias(UDM_ENV *Conf, UDM_DOCUMENT *Doc)
{
  char *alcopy, *aliastr;
  UDM_MATCH    *Alias;
  UDM_MATCH_PART Parts[10];

  /* Create "Alias" variable */
  alcopy=UdmRemoveHiLightDup(UdmVarListFindStr(&Doc->Sections,"URL",""));
  if ((Alias=UdmMatchListFind(&Conf->Aliases,alcopy,10,Parts)))
  {
    size_t nbytes= strlen(Alias->arg) + strlen(alcopy) + 64;
    aliastr= (char*)UdmMalloc(nbytes);
    UdmMatchApply(aliastr, nbytes, alcopy, Alias->arg, Alias, 10, Parts);
  }
  else
  {
    aliastr = (char*)UdmStrdup(alcopy);
  }
  UdmVarListReplaceStr(&Doc->Sections,"Alias", aliastr);
  UdmFree(aliastr);
  UdmFree(alcopy);
  return UDM_OK;
}


UDM_RESULT * __UDMCALL UdmFind(UDM_AGENT *A)
{
  UDM_RESULT  *Res, *TmpRes;
  int    res=UDM_OK;
  unsigned long  ticks=UdmStartTimer(), ticks_;
  size_t  i, nbytes, numdatabases=  A->Conf->dbl.nitems;
  size_t  page_number= (size_t) UdmVarListFindInt(&A->Conf->Vars, "np", 0);
  size_t  page_size=   (size_t) UdmVarListFindInt(&A->Conf->Vars, "ps", 10);
  size_t  offs=        (size_t) UdmVarListFindInt(&A->Conf->Vars, "offs", 0);
  size_t  ExcerptSize= (size_t)UdmVarListFindInt(&A->Conf->Vars, "ExcerptSize", 256);
  size_t  ExcerptPadding = (size_t)UdmVarListFindInt(&A->Conf->Vars, "ExcerptPadding", 40);
  char    str[128];

  UdmLog(A,UDM_LOG_DEBUG,"Start UdmFind");
  
  ticks_= UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start Prepare");  
  nbytes= sizeof(UDM_RESULT) * numdatabases;
  TmpRes= (UDM_RESULT*) UdmMalloc(nbytes);
  bzero((void*) TmpRes, sizeof(UDM_RESULT) * numdatabases);
  Res=UdmResultInit(NULL);
  if (UDM_OK != (res= UdmPrepare(A, Res)))
    goto conv;
  UdmVarListAddStr(&A->Conf->Vars, "orig_m", UdmVarListFindStr(&A->Conf->Vars, "m", "all"));
  ticks_= UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  Prepare:\t\t%.2f", (float)ticks_/1000);

  Res->first = offs ? offs : page_number * page_size;  

  ticks_= UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start FindWords");  
  if (!Res->WWList.nwords ||
      UDM_OK != (res= UdmFindWords(A, TmpRes, Res, Res->first + page_size)))
    goto conv;
  ticks_= UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  FindWords:\t\t%.2f", (float)ticks_/1000);
  
  if (!Res->total_found)
  {
    int suggest= UdmVarListFindBool(&A->Conf->Vars, "Suggest", 0);
    if (suggest)
    {
      if(UDM_OK != (res= UdmResAction(A, Res, UDM_RES_ACTION_SUGGEST)))
        goto conv;
    }
  }
  
  UdmVarListReplaceStr(&A->Conf->Vars, "m", UdmVarListFindStr(&A->Conf->Vars, "orig_m", "all"));
  UdmVarListDel(&A->Conf->Vars, "orig_m");
  UdmVarListReplaceInt(&A->Conf->Vars, "CurrentTimestamp", (int) time(0));


  if(Res->first >= Res->URLData.nitems)
  {
    Res->last= Res->first;
    Res->num_rows= 0;
    goto conv; /* jump to converting variables into BrowserCharset */
  }
  if(Res->first + page_size > Res->URLData.nitems)
  {
    Res->num_rows= Res->URLData.nitems - Res->first;
  }
  else
  {
    Res->num_rows= page_size;
  }
  Res->last= Res->first + Res->num_rows - 1;

  ticks_= UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start AddDocInfo");

  UdmResAllocDoc(Res);
  
  for (i= 0; i < numdatabases; i++)
  {
    UDM_DB *db= &A->Conf->dbl.db[i];
    switch(db->DBDriver){
    case UDM_DB_SEARCHD:
      res = UdmResAddDocInfoSearchd(A, TmpRes, db, Res, i);
      break;
#ifdef HAVE_SQL
    default:
      res = UdmResAddDocInfoSQL(A, db, Res, i);
      break;
#endif
    }
  }

  /* Copy url_id and coord to result */
  for(i=0;i<Res->num_rows;i++)
  {
    UDM_URLDATA *Data= &Res->URLData.Item[i + Res->first];
    uint4 score= Data->coord;
    UdmVarListReplaceUnsigned(&Res->Doc[i].Sections, "PerSite", Data->per_site);
    udm_snprintf(str, 128, "%.3f%%", ((double)(score >> 8)) / 1000);
    UdmVarListReplaceStr(&Res->Doc[i].Sections, "Score", str);
    UdmVarListReplaceInt(&Res->Doc[i].Sections,"Order",(int)(i + Res->first + 1));
    UdmVarListReplaceInt(&Res->Doc[i].Sections, "dbnum", UDM_COORD2DBNUM(score));
  }
  
  
  for (i= 0; i < Res->num_rows; i++)
    CreateAlias(A->Conf, &Res->Doc[i]);
    
  ticks_= UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  AddDocInfo:\t\t%.2f", (float)ticks_/1000);
  
  if (UdmVarListFindInt(&A->Conf->Vars, "DetectClones", 0))
  {
    size_t num=Res->num_rows;
    ticks_=UdmStartTimer();
    UdmLog(A, UDM_LOG_DEBUG, "Start Clones");
  
    for(i=0;i<num;i++)
    {
      UDM_RESULT *Cl = UdmCloneList(A, &Res->Doc[i]);
      if (Cl)
      {
        size_t c;
        UdmVarListReplaceInt(&Res->Doc[i].Sections, "nclones", Cl->num_rows);
        for (c= 0; c < Cl->num_rows; c++)
        {
          char name[32];
          sprintf(name, "Clone%d", c);
          UdmVarListReplaceLst(&Res->Doc[i].Sections,
                               &Cl->Doc[c].Sections, name, "*");
        }
        UdmResultFree(Cl);
      }
    }
    ticks_ = UdmStartTimer() - ticks_;
    UdmLog(A, UDM_LOG_DEBUG, "Stop  Clones:\t\t%.2f", (float)ticks_/1000);
  }
  
  /* first and last begins from 0, make it begin from 1 */
  Res->first++;
  Res->last++;
  
conv:

#ifndef NO_ADVANCE_CONV
  ticks_=UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start UdmConvert");
  UdmConvert(A->Conf, Res, A->Conf->lcs, A->Conf->bcs);
  ticks_ = UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  UdmConvert:\t\t%.2f", (float)ticks_/1000);
#endif

  ticks_=UdmStartTimer();
  UdmLog(A, UDM_LOG_DEBUG, "Start Excerpts");
  for (i= 0; i < Res->num_rows; i++)
  {
    char *Excerpt;
    if ((Excerpt= UdmExcerptDoc(A, Res, &Res->Doc[i], ExcerptSize, ExcerptPadding)))
    {
      UdmVarListReplaceStr(&Res->Doc[i].Sections, "body", Excerpt);
      UdmFree(Excerpt);
    }
  }
  ticks_ = UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  Excerpts:\t\t%.2f", (float)ticks_/1000);

  UdmLog(A, UDM_LOG_DEBUG, "Start WordInfo");
  ticks_=UdmStartTimer();
  UdmResWordInfo(A->Conf, Res);
  ticks_= UdmStartTimer() - ticks_;
  UdmLog(A, UDM_LOG_DEBUG, "Stop  WordInfo:\t\t%.2f", (float)ticks_/1000);

  Res->work_time= ticks= UdmStartTimer() - ticks;

  UdmTrack(A, Res);
  UdmLog(A,UDM_LOG_DEBUG,"Done  UdmFind %.2f",(float)ticks/1000);

  UdmFree(TmpRes);
  if(res!=UDM_OK)
  {
    UdmResultFree(Res);
    Res=NULL;
  }
  return Res;
}


static int UdmStr2DBMode(const char * str1)
{
  int m = -1;
  if(!strncasecmp(str1,"single",6))m=UDM_DBMODE_SINGLE;
  else if(!strncasecmp(str1,"multi",5))m=UDM_DBMODE_MULTI;
  else if(!strncasecmp(str1,"blob",4))m=UDM_DBMODE_BLOB;
  else if(!strncasecmp(str1,"rawblob",7))m=UDM_DBMODE_RAWBLOB;
  return(m);
}

__C_LINK const char* __UDMCALL UdmDBTypeToStr(int dbtype)
{
  switch(dbtype)
  {
    case UDM_DB_MYSQL:   return "mysql";
    case UDM_DB_PGSQL:   return "pgsql";
    case UDM_DB_IBASE:   return "ibase";
    case UDM_DB_MSSQL:   return "mssql";
    case UDM_DB_ORACLE8: return "oracle";
    case UDM_DB_SQLITE:  return "sqlite";
    case UDM_DB_SQLITE3: return "sqlite";
    case UDM_DB_MIMER:   return "mimer";
    case UDM_DB_VIRT:    return "virtuoso";
    case UDM_DB_ACCESS:  return "access";
    case UDM_DB_DB2:     return "db2";
    case UDM_DB_CACHE:   return "cache";
    case UDM_DB_SYBASE:  return "sybase";
  }
  return "unknown_dbtype";
}


__C_LINK const char* __UDMCALL UdmDBModeToStr(int dbmode)
{
  switch(dbmode) 
  {
    case UDM_DBMODE_SINGLE:  return "single";
    case UDM_DBMODE_MULTI:   return "multi";
    case UDM_DBMODE_BLOB:    return "blob";
    case UDM_DBMODE_RAWBLOB: return "rawblob";
  }
  return "unknown_dbmode";
}


static int UdmDBSetParam(UDM_DB *db, char *param)
{
  char *tok, *lt;
  
  for(tok = udm_strtok_r(param, "&",&lt) ; tok ; 
      tok = udm_strtok_r(NULL,"&",&lt))
  {
    char * val;
    if((val=strchr(tok,'=')))
    {
      *val++='\0';
      UdmVarListReplaceStr(&db->Vars, tok, val);
    }
    else
    {
      UdmVarListReplaceStr(&db->Vars, tok, "");
    }
  }
  return UDM_OK;
}


typedef struct udm_sqldb_driver_st
{
  const char *name;
  int DBType;
  int DBDriver;
  int DBSQL_IN;
  int flags;
  UDM_SQLDB_HANDLER *handler;
} UDM_SQLDB_DRIVER;


static UDM_SQLDB_DRIVER SQLDriver[]=
{
#if (HAVE_ORACLE8)
  {
    "oracle8", UDM_DB_ORACLE8, UDM_DB_ORACLE8, 1, 
    UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM,
    &udm_sqldb_oracle_handler
  },
  {
    "oracle", UDM_DB_ORACLE8, UDM_DB_ORACLE8, 1, 
    UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM,
    &udm_sqldb_oracle_handler
  },
#endif
#if (HAVE_CTLIB)
  {
    "mssql", UDM_DB_MSSQL, UDM_DB_MSSQL, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_ctlib_handler
  },
  {
    "sybase", UDM_DB_SYBASE, UDM_DB_MSSQL, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_ctlib_handler
  },
#endif
#if (HAVE_MYSQL)
  { 
    "mysql", UDM_DB_MYSQL, UDM_DB_MYSQL, 1,
    UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_mysql_handler
  },
#endif
#if (HAVE_PGSQL)
  {
    "pgsql", UDM_DB_PGSQL, UDM_DB_PGSQL, 1,
    UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT,
    &udm_sqldb_pgsql_handler,
  },
#endif
#if (HAVE_IBASE)
  {
    "ibase", UDM_DB_IBASE, UDM_DB_IBASE, 0,
    /* 
    while indexing large sites and using the SQL in statement 
    interbase will fail when the items in the in IN statements
    are more then 1500. We'd better have to fix code to avoid 
    big INs instead of hidding DBSQL_IN.
    */
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND,
    &udm_sqldb_ibase_handler,
  },
#endif
#if (HAVE_SQLITE)
  {
    "sqlite",
    UDM_DB_SQLITE, UDM_DB_SQLITE, 1,
    UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY,
    &udm_sqldb_sqlite_handler,
  },
#endif
#if (HAVE_SQLITE3)
  {
    "sqlite3",
    UDM_DB_SQLITE3, UDM_DB_SQLITE3, 1,
    UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_STDHEX,
    &udm_sqldb_sqlite3_handler,
  },
#endif
#if (HAVE_ODBC)
  {
    "odbc-solid", UDM_DB_SOLID, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-sapdb", UDM_DB_SAPDB, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-db2", UDM_DB_DB2, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
    UDM_SQL_HAVE_BIND    | UDM_SQL_HAVE_STDHEX,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-access", UDM_DB_ACCESS, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-mimer", UDM_DB_MIMER, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-cache", UDM_DB_CACHE, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
  { 
    "odbc-virtuoso", UDM_DB_VIRT, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-oracle", UDM_DB_ORACLE8, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-oracle8", UDM_DB_ORACLE8, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-mssql", UDM_DB_MSSQL, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-sybase", UDM_DB_SYBASE, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
    UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-mysql", UDM_DB_MYSQL, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX,
    &udm_sqldb_odbc_handler,
  },
  {
    /* Bind does not seem to work with BYTEA in Windows */
    "odbc-pgsql", UDM_DB_PGSQL, UDM_DB_ODBC, 1,
    UDM_SQL_HAVE_LIMIT      | UDM_SQL_HAVE_GROUPBY |
    UDM_SQL_HAVE_SUBSELECT  /*| UDM_SQL_HAVE_BIND*/,
    &udm_sqldb_odbc_handler,
  },
  {
    "odbc-ibase", UDM_DB_IBASE, UDM_DB_ODBC, 0,
    /* 
    while indexing large sites and using the SQL in statement 
    interbase will fail when the items in the in IN statements
    are more then 1500. We'd better have to fix code to avoid 
    big INs instead of hidding DBSQL_IN.
    */
    UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND,
    &udm_sqldb_odbc_handler,
  },
#endif
  {
    NULL, 0, 0, 0, 0, NULL
  }
};


static UDM_SQLDB_DRIVER *UdmSQLDriverByName(const char *name)
{
  UDM_SQLDB_DRIVER *drv;
  for (drv= SQLDriver; drv->name; drv++)
  {
    if (!strcasecmp(name, drv->name))
      return drv;
    if (!strncasecmp(drv->name, "odbc-", 5) &&
        !strcasecmp(drv->name + 5, name))
      return drv;
  }
  return NULL;
}


int UdmDBSetAddr(UDM_DB *db, const char *dbaddr, int mode)
{
  UDM_URL    addr;
  char     *s;
  const char *v;
  int        rc= UDM_OK;
  
  UdmVarListFree(&db->Vars);
  UDM_FREE(db->DBName);
  UDM_FREE(db->where);
  UDM_FREE(db->from);
  db->DBMode= UDM_DBMODE_BLOB;
  
  UdmVarListReplaceStr(&db->Vars, "DBAddr", dbaddr);
  
  UdmURLInit(&addr);
  
  if((!dbaddr) || UdmURLParse(&addr, dbaddr) || (!addr.schema))
  {
    /* TODO: add better error message here */
    rc= UDM_ERROR;
    goto ret;
  }
  
  if (addr.auth)
  {
    /*
      Unescape user and password to allow URL specific
      characters like '"<>@#? to be used as user or password part.
      
      It's safe to spoil addr.auth here, as we don't
      need it anymore after setting DBUser and DBPass
    */
    
    if ((s= strchr(addr.auth,':')))
    {
      *s++= 0;
      UdmUnescapeCGIQuery(s, s);
      UdmVarListReplaceStr(&db->Vars, "DBPass", s);
    }
    UdmUnescapeCGIQuery(addr.auth, addr.auth);
    UdmVarListReplaceStr(&db->Vars, "DBUser", addr.auth);
  }
  
  UdmVarListReplaceStr(&db->Vars, "DBHost", addr.hostname);
  if (addr.port)
    UdmVarListReplaceInt(&db->Vars, "DBPort", addr.port);
  
  if((s = strchr(UDM_NULL2EMPTY(addr.filename), '?')))
  {
    *s++='\0';
    if (UDM_OK != UdmDBSetParam(db, s))
    {
      rc= UDM_ERROR;
      goto ret;
    }
    UdmVarListReplaceStr(&db->Vars, "filename", addr.filename);
  }
  else
  {
    UdmVarListReplaceStr(&db->Vars, "filename", addr.filename);
  }
  
  if(!strcasecmp(addr.schema, "searchd") ||
     !strcasecmp(addr.schema, "http") ||
     !strcasecmp(addr.schema, "file"))
  {
    db->DBType=UDM_DB_SEARCHD;
    db->DBDriver=UDM_DB_SEARCHD;
  }
  else
  {
    UDM_SQLDB_DRIVER *drv= UdmSQLDriverByName(addr.schema);
    if (!drv)
    {
    
      rc= UDM_ERROR;
      goto ret;
    }
    
    db->DBType= drv->DBType;
    db->DBDriver= drv->DBDriver;
    db->DBSQL_IN= drv->DBSQL_IN;
    db->flags= drv->flags;
    db->sql= drv->handler;
  }
  
  if((v= UdmVarListFindStr(&db->Vars,"numtables",NULL)))
  {
    db->numtables= atoi(v);
    if(!db->numtables)
      db->numtables=1;
  }

  if((v= UdmVarListFindStr(&db->Vars,"dbmode",NULL)))
  {
    if ((db->DBMode=UdmStr2DBMode(v)) < 0) 
    return UDM_ERROR;
  }
  
  if((v= UdmVarListFindStr(&db->Vars,"dbmodesearch",NULL)))
  {
    int DBMode;
    if ((DBMode=UdmStr2DBMode(v)) < 0) 
      return UDM_ERROR;
    if (DBMode == UDM_DBMODE_BLOB  &&
        db->DBType != UDM_DB_MYSQL &&
        db->DBType != UDM_DB_SYBASE &&
        db->DBType != UDM_DB_MSSQL &&
        db->DBType != UDM_DB_MIMER &&
        db->DBType != UDM_DB_ORACLE8 &&
        db->DBType != UDM_DB_DB2 &&
        db->DBType != UDM_DB_PGSQL &&
        db->DBType != UDM_DB_IBASE &&
        db->DBType != UDM_DB_SQLITE3)
      return UDM_ERROR;
  }

  if((v= UdmVarListFindStr(&db->Vars, "debugsql", "no")))
  {
    if (!strcasecmp(v, "yes")) 
      db->flags |= UDM_SQL_DEBUG_QUERY;
  }

  if(db->DBDriver == UDM_DB_IBASE ||
     db->DBDriver == UDM_DB_SQLITE ||
     db->DBDriver == UDM_DB_SQLITE3)
  {
    /*
      Ibase is a special case:
      It's database name consists of
      full path and file name        
    */
    db->DBName = (char*)UdmStrdup(UDM_NULL2EMPTY(addr.path));
  }
  else
  {
    /*
      ODBC Data Source Names may contain space and
      other tricky characters, let's unescape them.
    */
    size_t len= strlen(UDM_NULL2EMPTY(addr.path));
    char  *src= (char*)UdmMalloc(len+1);
    src[0]= '\0';
    sscanf(UDM_NULL2EMPTY(addr.path), "/%[^/]s", src);
    db->DBName= (char*)UdmMalloc(len+1);
    UdmUnescapeCGIQuery(db->DBName, src);
    UdmFree(src);
  }
ret:
  UdmURLFree(&addr);
  return rc;
}


__C_LINK int __UDMCALL UdmStatAction(UDM_AGENT *A, UDM_STATLIST *S)
{
  UDM_DB  *db;
  int  res=UDM_ERROR;
  size_t i, dbfrom = 0, dbto;

  UDM_GETLOCK(A, UDM_LOCK_CONF);
  dbto=  A->Conf->dbl.nitems;
  S->nstats = 0;
  S->Stat = NULL;

  for (i = dbfrom; i < dbto; i++)
  {
    db = &A->Conf->dbl.db[i];
#ifdef HAVE_SQL
    UDM_GETLOCK(A, UDM_LOCK_DB);
    res = UdmStatActionSQL(A, S, db);
    UDM_RELEASELOCK(A, UDM_LOCK_DB);
#endif
    if (res != UDM_OK)
    {
      strcpy(A->Conf->errstr,db->errstr);
      db->errcode= 0;
      break;
    }
  }
  UDM_RELEASELOCK(A, UDM_LOCK_CONF);
  return res;
}

unsigned int UdmGetCategoryId(UDM_ENV *Conf, char *category)
{
  UDM_DB  *db;
  unsigned int rc = 0;
  size_t i, dbfrom = 0, dbto =  Conf->dbl.nitems;

  for (i = dbfrom; i < dbto; i++)
  {
    db = &Conf->dbl.db[i];
#ifdef HAVE_SQL
    rc = UdmGetCategoryIdSQL(Conf, category, db);
    if (rc != 0) return rc;
#endif
  }
  return rc;
}


int UdmTrack(UDM_AGENT * query, UDM_RESULT *Res)
{
  int rc = UDM_OK;
#ifdef HAVE_SQL
  size_t i, dbfrom = 0, dbto=  query->Conf->dbl.nitems; 
  char * env= getenv("REMOTE_ADDR");
  UdmVarListAddStr(&query->Conf->Vars, "IP", env ? env : "");
  
  for (i = dbfrom; i < dbto; i++)
  {
    const char *v;
    UDM_DB *db = &query->Conf->dbl.db[i];
    if((v= UdmVarListFindStr(&db->Vars,"trackquery",NULL)))
      rc = UdmTrackSQL(query, Res, db);
  }
#endif
  return rc;
}


UDM_RESULT * UdmCloneList(UDM_AGENT * Indexer, UDM_DOCUMENT *Doc)
{
  size_t i, dbfrom = 0, dbto =  Indexer->Conf->dbl.nitems;
  UDM_DB    *db;
  UDM_RESULT  *Res;
  int    rc = UDM_OK;

  Res = UdmResultInit(NULL);
  
  for (i = dbfrom; i < dbto; i++)
  {
    db = &Indexer->Conf->dbl.db[i];
    switch(db->DBDriver)
    {
      case UDM_DB_SEARCHD:
        rc = UdmCloneListSearchd(Indexer, Doc, Res, db);
        break;
#ifdef HAVE_SQL
     default:
        rc = UdmCloneListSQL(Indexer, Doc, Res, db);
        break;
#endif
    }
    if (rc != UDM_OK) break;
  }
  if (Res->num_rows > 0) return Res;
  UdmResultFree(Res);
  return NULL;
}


int UdmCheckUrlid(UDM_AGENT *Agent, urlid_t id)
{
  size_t i, dbfrom = 0, dbto;
  UDM_DB    *db;
  int    rc = 0;

  UDM_GETLOCK(Agent, UDM_LOCK_CONF);
  dbto =  Agent->Conf->dbl.nitems;

  for (i = dbfrom; i < dbto; i++)
  {
    db = &Agent->Conf->dbl.db[i];
    UDM_GETLOCK(Agent, UDM_LOCK_DB); 
    switch(db->DBDriver)
    {
#ifdef HAVE_SQL
      default:
        rc = UdmCheckUrlidSQL(Agent, db, id);
        break;
#endif
    }
    UDM_RELEASELOCK(Agent, UDM_LOCK_DB);
    if (rc != 0) break;
  }
  UDM_RELEASELOCK(Agent, UDM_LOCK_CONF);
  return rc;
}


/********************************************************/


UDM_DBLIST * UdmDBListInit(UDM_DBLIST * List)
{
  bzero((void*)List, sizeof(*List));
  return(List);
}

size_t UdmDBListAdd(UDM_DBLIST *List, const char * addr, int mode)
{
  UDM_DB  *db;
  int res;
  db=List->db=(UDM_DB*)UdmRealloc(List->db,(List->nitems+1)*sizeof(UDM_DB));
  db+=List->nitems;
  UdmDBInit(db);
  res = UdmDBSetAddr(db, addr, mode);
  if (res == UDM_OK) List->nitems++;
  return res;
}

void UdmDBListFree(UDM_DBLIST *List)
{
  size_t  i;
  UDM_DB  *db=List->db;
  
  for(i = 0; i < List->nitems; i++)
  {
    UdmDBFree(&db[i]);
  }
  UDM_FREE(List->db);
  UdmDBListInit(List);
}

/********************* MultiCache stuff *************************/
UDM_MULTI_CACHE *UdmMultiCacheInit (UDM_MULTI_CACHE *cache)
{
  size_t i;

  if (! cache)
  {
    cache = UdmMalloc(sizeof(UDM_MULTI_CACHE));
    if (! cache) return(NULL);
    cache->free = 1;
  }
  else
  {
    cache->free = 0;
  }
  cache->nrecs = 0;

  for (i = 0; i <= MULTI_DICTS; i++)
  {
    cache->tables[i].nurls = 0;
    cache->tables[i].urls = NULL;
  }

  cache->nurls = 0;
  cache->urls = NULL;

  return(cache);
}

void UdmMultiCacheFree (UDM_MULTI_CACHE *cache)
{
  size_t w, s, u, t;

  if (! cache) return;
  for (t = 0; t <= MULTI_DICTS; t++)
  {
    UDM_MULTI_CACHE_TABLE *table = &cache->tables[t];
    for (u = 0; u < table->nurls; u++)
    {
      UDM_MULTI_CACHE_URL *url = &table->urls[u];
      for (s = 0; s < url->nsections; s++)
      {
        UDM_MULTI_CACHE_SECTION *section = &url->sections[s];
        for (w = 0; w < section->nwords; w++) {
          UDM_MULTI_CACHE_WORD *word = &section->words[w];
          UdmFree(word->word);
          UdmFree(word->intags);
        }
        UdmFree(section->words);
      }
      UdmFree(url->sections);
    }
    UdmFree(table->urls);
    cache->tables[t].nurls = 0;
    cache->tables[t].urls = NULL;
  }

  UdmFree(cache->urls);
  cache->nurls = 0;
  cache-> urls = NULL;
  cache->nrecs = 0;

  if (cache->free) UdmFree(cache);
}

int UdmWordCacheFlush (UDM_AGENT *Indexer)
{
#ifdef HAVE_SQL
  size_t i;
  UDM_GETLOCK(Indexer, UDM_LOCK_CONF);
  for (i= 0; i < Indexer->Conf->dbl.nitems; i++)
  {
    int rc= UDM_OK;
    UDM_DB *db= &Indexer->Conf->dbl.db[i];
    UDM_GETLOCK(Indexer, UDM_LOCK_DB);
    switch (db->DBMode) {
      case UDM_DBMODE_MULTI:
        rc= UdmWordCacheWrite(Indexer, db, 0);
        break;
    }
    if (rc != UDM_OK)
    {
      UdmLog(Indexer, UDM_LOG_ERROR, "%s", db->errstr);
      return rc;
    }
    UDM_RELEASELOCK(Indexer, UDM_LOCK_DB);
  }
  UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);
#endif
  return UDM_OK;
}

int UdmMulti2Blob (UDM_AGENT *Indexer)
{
#ifdef HAVE_SQL
  size_t i;
  unsigned long ticks;

  UdmLog(Indexer,UDM_LOG_ERROR,"Converting to blob");
  ticks=UdmStartTimer();

  for (i = 0; i < Indexer->Conf->dbl.nitems; i++)
  {
    int rc;
    UDM_DB *db = &Indexer->Conf->dbl.db[i];
    UDM_GETLOCK(Indexer, UDM_LOCK_DB);
    rc= UdmConvert2BlobSQL(Indexer, db);
    UDM_RELEASELOCK(Indexer, UDM_LOCK_DB);
    if (rc != UDM_OK)
    {
      UdmLog(Indexer,UDM_LOG_ERROR,"%s",db->errstr); 
      return rc;
    }
  }

  ticks=UdmStartTimer()-ticks;
  UdmLog(Indexer,UDM_LOG_ERROR,"Converting to blob finished\t%.2f",(float)ticks/1000);
#endif
  return UDM_OK;
}


int UdmRewriteURL(UDM_AGENT *Indexer)
{
#ifdef HAVE_SQL
  size_t i;
  unsigned long ticks;

  UdmLog(Indexer,UDM_LOG_ERROR,"Converting to blob");
  ticks=UdmStartTimer();

  for (i = 0; i < Indexer->Conf->dbl.nitems; i++)
  {
    int rc;
    UDM_DB *db = &Indexer->Conf->dbl.db[i];
    UDM_GETLOCK(Indexer, UDM_LOCK_DB);
    rc= UdmBlobWriteURL(Indexer, db, "bdict",
          UdmVarListFindBool(&db->Vars, "deflate", 0));
    UDM_RELEASELOCK(Indexer, UDM_LOCK_DB);
    if (rc != UDM_OK)
    {
      UdmLog(Indexer,UDM_LOG_ERROR,"%s",db->errstr); 
      return rc;
    }
  }

  ticks=UdmStartTimer()-ticks;
  UdmLog(Indexer,UDM_LOG_ERROR,"Converting to blob finished\t%.2f",(float)ticks/1000);
#endif
  return UDM_OK;
}


int UdmRewriteLimits(UDM_AGENT *Indexer)
{
#ifdef HAVE_SQL
  size_t i;
  unsigned long ticks;

  UdmLog(Indexer,UDM_LOG_ERROR,"Rewritting limits");
  ticks=UdmStartTimer();

  for (i = 0; i < Indexer->Conf->dbl.nitems; i++)
  {
    int rc;
    UDM_DB *db = &Indexer->Conf->dbl.db[i];
    UDM_GETLOCK(Indexer, UDM_LOCK_DB);
    rc= UdmBlobWriteLimits(Indexer, db, "bdict",
          UdmVarListFindBool(&db->Vars, "deflate", 0));
    UDM_RELEASELOCK(Indexer, UDM_LOCK_DB);
    if (rc != UDM_OK)
    {
      UdmLog(Indexer,UDM_LOG_ERROR,"%s",db->errstr); 
      return rc;
    }
  }

  ticks=UdmStartTimer()-ticks;
  UdmLog(Indexer,UDM_LOG_ERROR,"Rewritting limits\t%.2f",(float)ticks/1000);
#endif
  return UDM_OK;
}


int UdmExport (UDM_AGENT *Indexer)
{
  int rc= UDM_OK;
#ifdef HAVE_SQL
  size_t i;
  unsigned long ticks;

  UdmLog(Indexer,UDM_LOG_ERROR,"Starting export");
  ticks=UdmStartTimer();

  for (i = 0; i < Indexer->Conf->dbl.nitems; i++)
  {
    UDM_DB *db = &Indexer->Conf->dbl.db[i];
    UDM_GETLOCK(Indexer, UDM_LOCK_DB);
    rc= UdmExportSQL(Indexer, db);
    UDM_RELEASELOCK(Indexer, UDM_LOCK_DB);
    if (rc != UDM_OK)
    {
      UdmLog(Indexer,UDM_LOG_ERROR,"%s",db->errstr); 
      break;
    }
  }
  
  ticks=UdmStartTimer()-ticks;
  UdmLog(Indexer,UDM_LOG_ERROR,"Export finished\t%.2f",(float)ticks/1000);
#endif
  return rc;
}


static int mccmpword(uint4 *a, uint4 *b)
{
  if (*a > *b) return 1;
  if (*a < *b) return -1;
  return 0;
}


static size_t UdmMultiCacheAddWord (UDM_MULTI_CACHE_WORD *cache, uint4 coord)
{
  uint4 *tmp;

  if (! cache) return(0);
  tmp = UdmRealloc(cache->intags, (cache->nintags + 1) * sizeof(uint4));
  if (! tmp) return(0);
  cache->intags = tmp;
  cache->intags[cache->nintags] = coord;
  cache->nintags++;
  UdmSort(cache->intags, cache->nintags, sizeof(uint4), (udm_qsort_cmp)mccmpword);
  return(1);
}

static size_t UdmMultiCacheAddSection (UDM_MULTI_CACHE_SECTION *cache, UDM_WORD *word)
{
  size_t i;
  uint4 coord = word->pos & 0x1FFFFF;

  if (! cache) return(0);

  for (i = 0; i < cache->nwords; i++)
  {
    if (! strcmp(cache->words[i].word, word->word))  break;
  }

  if (i == cache->nwords)
  {
    UDM_MULTI_CACHE_WORD *tmp;
    tmp = UdmRealloc(cache->words, (cache->nwords + 1) * sizeof(UDM_MULTI_CACHE_WORD));
    if (! tmp) return(0);
    cache->words = tmp;
    cache->words[cache->nwords].word = (char *)UdmStrdup(word->word);
    cache->words[cache->nwords].nintags = 0;
    cache->words[cache->nwords].intags = NULL;
    cache->nwords++;
  }

  return(UdmMultiCacheAddWord(&cache->words[i], coord));
}

static size_t UdmMultiCacheAddURL (UDM_MULTI_CACHE_URL *cache, UDM_WORD *word)
{
  size_t i;
  unsigned char secno= word->secno;

  if (! cache) return(0);

  for (i = 0; i < cache->nsections; i++)
    if (cache->sections[i].secno == secno) break;

  if (i == cache->nsections)
  {
    UDM_MULTI_CACHE_SECTION *tmp;
    tmp = UdmRealloc(cache->sections, (cache->nsections + 1) * sizeof(UDM_MULTI_CACHE_SECTION));
    if (! tmp) return(0);
    cache->sections = tmp;
    cache->sections[cache->nsections].secno = secno;
    cache->sections[cache->nsections].nwords = 0;
    cache->sections[cache->nsections].words = NULL;
    cache->nsections++;
  }

  return(UdmMultiCacheAddSection(&cache->sections[i], word));
}

static size_t UdmMultiCacheAddTable (UDM_MULTI_CACHE_TABLE *cache, urlid_t url_id, unsigned char reindex, UDM_WORD *word)
{
  size_t i;

  if (! cache) return(0);
  for (i = 0; i < cache->nurls; i++)
  {
    if (cache->urls[i].url_id == url_id) break;
  }

  if (i == cache->nurls)
  {
    UDM_MULTI_CACHE_URL *tmp;
    tmp = UdmRealloc(cache->urls, (cache->nurls + 1) * sizeof(UDM_MULTI_CACHE_URL));
    if (! tmp) return(0);
    cache->urls = tmp;
    cache->urls[cache->nurls].url_id = url_id;
    cache->urls[cache->nurls].reindex = reindex;
    cache->urls[cache->nurls].nsections = 0;
    cache->urls[cache->nurls].sections = NULL;
    cache->nurls++;
  }

  return(UdmMultiCacheAddURL(&cache->urls[i], word));
}

size_t UdmMultiCacheAdd (UDM_MULTI_CACHE *cache, urlid_t url_id, unsigned char reindex, UDM_WORD *word)
{
  udmhash32_t table = UdmStrHash32(word->word) & MULTI_DICTS;
  size_t i;
  
  if (! cache) return(0);

  cache->nrecs++;

  if (reindex)
  {
    for (i = 0; i < cache->nurls; i++)
      if (cache->urls[i] == url_id) break;

    if (i == cache->nurls)
    {
      urlid_t *tmp;
      tmp = UdmRealloc(cache->urls, (cache->nurls + 1) * sizeof(urlid_t));
      if (! tmp) return(0);
      cache->urls = tmp;
      cache->urls[cache->nurls] = url_id;
      cache->nurls++;
    }
  }

  return(UdmMultiCacheAddTable(&cache->tables[table], url_id, reindex, word));
}

/******************** BlobCache stuff *******************/

UDM_BLOB_CACHE *UdmBlobCacheInit (UDM_BLOB_CACHE *cache)
{
  if (! cache)
  {
    cache = UdmMalloc(sizeof(UDM_BLOB_CACHE));
    if (! cache) return(NULL);
    cache->free= 1;
  }
  else
  {
    cache->free= 0;
  }
  cache->errors= 0;
  cache->nwords= 0;
  cache->awords= 0;
  cache->words= NULL;

  return(cache);
}

void UdmBlobCacheFree (UDM_BLOB_CACHE *cache)
{
  size_t i;
  for (i= 0; i < cache->nwords; i++)
  {
    if (cache->words[i].freeme)
      UDM_FREE(cache->words[i].word);
  }

  UdmFree(cache->words);
  cache->errors= 0;
  cache->nwords= 0;
  cache->awords= 0;
  cache->words= NULL;

  if (cache->free) UdmFree(cache);
}


static inline int
UdmBlobCacheCheckValue(urlid_t url_id, unsigned char secno,
                       const char *word, size_t nintags,
                       const char *intag, size_t intaglen)
{
  if (! url_id)
  {
    fprintf(stderr, "url_id variable empty\n");
    return 1;
  }
  if (! secno)
  {
    fprintf(stderr, "secno variable empty\n");
    return 1;
  }
  if (! word)
  {
    fprintf(stderr, "word variable empty\n");
    return 1;
  }
  if (! nintags)
  {
    fprintf(stderr, "nintags variable empty\n");
    return 1;
  }
  if (! intag)
  {
    fprintf(stderr, "intag variable empty\n");
    return 1;
  }
  return 0;
}


static inline int
UdmBlobCacheRealloc(UDM_BLOB_CACHE *cache)
{
  if (cache->nwords == cache->awords)
  {
    UDM_BLOB_CACHE_WORD *tmp;
    size_t nbytes= (cache->awords + 256) * sizeof(UDM_BLOB_CACHE_WORD);
    tmp= UdmRealloc(cache->words, nbytes);
    if (!tmp)
    {
      cache->errors++;
      if (cache->errors < 10 || (cache->errors % 2048) == 0)
      fprintf(stderr, "BlobCacheRealloc: failed %d times: %d bytes, %d records\n",
              cache->errors, nbytes, (cache->awords + 256));
      return 1;
    }
    cache->words = tmp;
    cache->awords += 256;
  }
  return 0;
}


size_t UdmBlobCacheAdd (UDM_BLOB_CACHE *cache, urlid_t url_id,
                        unsigned char secno, const char *word,
                        size_t nintags, const char *intag, size_t intaglen)
{
  size_t word_len;
  UDM_BLOB_CACHE_WORD *W;
  
  if (UdmBlobCacheCheckValue(url_id, secno, word, nintags, intag, intaglen) ||
      UdmBlobCacheRealloc(cache))
    return 0;
  
  word_len= strlen(word);
  W= &cache->words[cache->nwords];
  W->secno= secno;
  W->url_id= url_id;
  W->nintags= nintags;
  W->ntaglen= intaglen;
  W->word= UdmMalloc(word_len + intaglen + 2);
  W->intags= W->word + word_len + 1;
  memcpy(W->word, word, word_len + 1);
  memcpy(W->intags, intag, intaglen);
  W->intags[intaglen]= '\0';
  W->freeme= 1;

  cache->nwords++;

  return(1);
}


size_t UdmBlobCacheAdd2(UDM_BLOB_CACHE *cache, urlid_t url_id,
                        unsigned char secno, char *word,
                        size_t nintags, char *intag, size_t intaglen)
{
  UDM_BLOB_CACHE_WORD *W;
  
  if (UdmBlobCacheCheckValue(url_id, secno, word, nintags, intag, intaglen) ||
      UdmBlobCacheRealloc(cache))
    return 0;
  
  W= &cache->words[cache->nwords];
  W->secno= secno;
  W->url_id= url_id;
  W->nintags= nintags;
  W->ntaglen= intaglen;
  W->word= word;
  W->intags= intag;
  W->freeme= 0;

  cache->nwords++;

  return(1);
}

static int bccmpwrd (UDM_BLOB_CACHE_WORD *s1, UDM_BLOB_CACHE_WORD *s2)
{
  int _ = strcmp(s1->word, s2->word);
  if (! _) _ = s1->secno - s2->secno;
  if (! _) {
    if (s1->url_id > s2->url_id) _ = 1;
    else if (s1->url_id < s2->url_id) _ = -1;
    else _ = 0;
  }
  return(_);
}

void UdmBlobCacheSort (UDM_BLOB_CACHE *cache)
{
  if (cache->nwords)
    UdmSort(cache->words, cache->nwords, sizeof(UDM_BLOB_CACHE_WORD), (udm_qsort_cmp)bccmpwrd);
}

UDM_WORD_CACHE *UdmWordCacheInit (UDM_WORD_CACHE *cache)
{
  if (! cache)
  {
    cache = UdmMalloc(sizeof(UDM_WORD_CACHE));
    if (! cache) return(NULL);
    cache->free = 1;
  }
  else
  {
    cache->free = 0;
  }
  cache->nbytes = sizeof(UDM_WORD_CACHE);
  cache->nwords = 0;
  cache->awords = 0;
  cache->words = NULL;
  cache->nurls = 0;
  cache->aurls = 0;
  cache->urls = NULL;

  return(cache);
}

void UdmWordCacheFree (UDM_WORD_CACHE *cache)
{
  size_t i;

  if (! cache) return;
  for (i = 0; i < cache->nwords; i++) UDM_FREE(cache->words[i].word);
  UDM_FREE(cache->words);
  UDM_FREE(cache->urls);
  cache->nbytes = sizeof(UDM_WORD_CACHE);
  cache->nwords = 0;
  cache->awords = 0;
  cache->nurls = 0;
  cache->aurls = 0;

  if (cache->free) UdmFree(cache);
}

static int wccmpwrd (UDM_WORD_CACHE_WORD *s1, UDM_WORD_CACHE_WORD *s2)
{
  register int _;
  if ((_= s1->seed - s2->seed))
    return _;
  if ((_= s1->secno - s2->secno))
    return _;
  if (s1->url_id > s2->url_id)
    return 1;
  if (s1->url_id < s2->url_id)
    return -1;
  if ((_= strcmp(s1->word, s2->word)))
    return _;
  return s1->pos - s2->pos;
}

void UdmWordCacheSort (UDM_WORD_CACHE *cache)
{
  UdmSort(cache->words, cache->nwords, sizeof(UDM_WORD_CACHE_WORD), (udm_qsort_cmp)wccmpwrd);
}

int UdmWordCacheAdd(UDM_WORD_CACHE *cache, urlid_t url_id, UDM_WORD *W)
{
  if (!W->word) return(UDM_OK);

  if (cache->nwords == cache->awords)
  {
    UDM_WORD_CACHE_WORD *tmp;
    tmp= UdmRealloc(cache->words, (cache->awords + 256) * sizeof(UDM_WORD_CACHE_WORD));
    if (!tmp)
    {
      fprintf(stderr, "Realloc failed while adding word\n");
      return(UDM_ERROR);
    }
    cache->words= tmp;
    cache->awords+= 256;
    cache->nbytes+= sizeof(UDM_WORD_CACHE_WORD) * 256;
  }

  if (!(cache->words[cache->nwords].word= UdmStrdup(W->word)))
    return(UDM_ERROR);
  cache->words[cache->nwords].url_id= url_id;
  cache->words[cache->nwords].secno= W->secno & 0xFF;
  cache->words[cache->nwords].pos= W->pos & 0x1FFFFF;
  cache->words[cache->nwords].seed= UdmStrHash32(W->word) & MULTI_DICTS;
  cache->nwords++;
  cache->nbytes+= strlen(W->word) + 1;
  return(UDM_OK);
}

int UdmWordCacheAddURL (UDM_WORD_CACHE *cache, urlid_t url_id)
{
  if (cache->nurls == cache->aurls)
  {
    urlid_t *tmp;
    tmp = UdmRealloc(cache->urls, (cache->aurls + 256) * sizeof(urlid_t));
    if (!tmp)
    {
      fprintf(stderr, "Realloc failed while adding word\n");
      return(UDM_ERROR);
    }
    cache->urls = tmp;
    cache->aurls += 256;
    cache->nbytes += sizeof(urlid_t) * 256;
  }

  cache->urls[cache->nurls] = url_id;
  cache->nurls++;
  return(UDM_OK);
}
