/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <fcntl.h>
#include <signal.h>
#include <assert.h>

#ifdef WIN32
#include <process.h>
#endif

#ifdef HAVE_READLINE
#include <readline/readline.h>
#include <readline/history.h>
#endif

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#ifdef CHASEN
#include <chasen.h>
#endif

#ifdef MECAB
#include <mecab.h>
#endif

#include "udmsearch.h"
#include "udm_sqldbms.h"
#include "udm_uniconv.h"

/* This should be last include */
#ifdef DMALLOC
#include "dmalloc.h"
#endif

       unsigned int seconds =0; /* To sleep between documents    */
       int flags    =0; /* For indexer            */
       int total_threads=0; /* Total threads number         */
       int sleep_threads=0; /* Number of sleepping threads      */
       int next_thread  =1; /* Handle number for indexer    */
       int max_index_time=-1;
       int cur_url_number=0;
static int log2stderr=1;
static char cname[1024]="";
static int add_servers=UDM_FLAG_ADD_SERV;
static int add_server_urls = UDM_FLAG_ADD_SERVURL;
static int load_langmaps=UDM_FLAG_LOAD_LANGMAP;
static int load_spells=UDM_FLAG_SPELL;
static int warnings=1;
       int maxthreads=1;
       int indexing = 0;

static UDM_ENV Conf;

UDM_AGENT **ThreadIndexers;

#ifdef HAVE_PTHREAD

#ifdef WIN32

HANDLE *threads;

/* CallBack function for Thread information */
extern __C_LINK void __UDMCALL UdmShowInfo(UDM_AGENT* A, const char *state, const char* str);

#else

pthread_t *threads;

#endif

#endif


static int UdmDisplaySQLQuery(UDM_SQLMON_PARAM *prm, UDM_SQLRES *sqlres) {
     int       res = UDM_OK;
#ifdef HAVE_SQL     
     size_t         i,j;

     if (prm->flags & UDM_SQLMON_DISPLAY_FIELDS)
     {
          for (i=0;i<sqlres->nCols;i++){
               if(i>0)fprintf(prm->outfile,"\t");
               fprintf(prm->outfile,"%s",sqlres->Fields ? sqlres->Fields[i].sqlname : "<NONAME>");
               if(i+1==sqlres->nCols)fprintf(prm->outfile,"\n");
          }
     }
     
     for (i=0;i<sqlres->nRows;i++){
          for(j=0;j<sqlres->nCols;j++){
               const char *v=UdmSQLValue(sqlres,i,j);
               if(j>0)fprintf(prm->outfile,"\t");
               if (j < 10 && (prm->colflags[j] & 1))
               {
                 const char* s;
                 fprintf(prm->outfile,"0x");
                 for (s=v; s[0]; s++)
                   fprintf(prm->outfile,"%02X",(int)(unsigned char)s[0]);
               }
               else
               {
                 fprintf(prm->outfile,"%s",v?v:"NULL");
               }
               if(j+1==sqlres->nCols)fprintf(prm->outfile,"\n");
          }
     }
     
#endif
     return res;
}

static char* sqlmongets(UDM_SQLMON_PARAM *prm, char *str, size_t size)
{
#ifdef HAVE_READLINE
  if ((prm->infile == stdin) && isatty(0))
  {
     char prompt[]="SQL>";
     char *line= readline(prompt);
     if (!line)
       return 0;
     
     if (*line) add_history(line);
     strncpy(str, line, size);
  }
  else
#endif
  {
    prm->prompt(prm, UDM_SQLMON_MSG_PROMPT, "SQL>");
    if(!fgets(str, size, prm->infile))
      return 0;
  }
  return str;
}

static int sqlmonprompt(UDM_SQLMON_PARAM *prm, int msqtype, const char *msg)
{
  fprintf(prm->outfile,"%s",msg);
  return UDM_OK;
}

__C_LINK /*static*/ const char* __UDMCALL UdmIndCmdStr(enum udm_indcmd cmd)
{
  switch(cmd)
  {
    case UDM_IND_CREATE: return "create";
    case UDM_IND_DROP:   return "drop";
    default: return "";
  }
  return "unknown_cmd";
}



static int CreateOrDrop(UDM_AGENT *A, enum udm_indcmd cmd)
{
  size_t i;
  char fname[1024];
  const char *sdir=UdmVarListFindStr(&Conf.Vars,"ShareDir",UDM_SHARE_DIR);
  UDM_DBLIST *L= &A->Conf->dbl;
  UDM_SQLMON_PARAM prm;
  
  for (i=0; i<L->nitems; i++)
  {
    FILE *infile;
    UDM_DB *db= &L->db[i];
    udm_snprintf(fname,sizeof(fname),"%s%s%s%s%s.%s.sql",
      sdir,UDMSLASHSTR, UdmDBTypeToStr(db->DBType),UDMSLASHSTR,
      UdmIndCmdStr(cmd),UdmDBModeToStr(db->DBMode));
    printf("'%s' dbtype=%d dbmode=%d\n",fname,db->DBType,db->DBMode);
    if(!(infile= fopen(fname,"r")))
    {
      sprintf(A->Conf->errstr,"Can't open file '%s'",fname);
      return UDM_ERROR;
    }
    L->currdbnum= i;
    bzero((void*)&prm,sizeof(prm));
    prm.infile= infile;
    prm.outfile= stdout;
    prm.flags= UDM_SQLMON_DISPLAY_FIELDS;
    prm.gets= sqlmongets;
    prm.display= UdmDisplaySQLQuery;
    prm.prompt= sqlmonprompt;
    UdmSQLMonitor(A, A->Conf,&prm);
    printf("%d queries sent, %d succeeded, %d failed\n",
      prm.nqueries, prm.ngood, prm.nbad);
    fclose(infile);
  }
  return UDM_OK;
}

static int ShowStatistics(UDM_AGENT *Indexer){
     int       res;
     struct tm tm;
     const char *stat_time;
     char sbuf[32];
     UDM_STATLIST   Stats;
     size_t         snum;
     UDM_STAT  Total;
     
     bzero((void*)&Total, sizeof(Total));
     Stats.time = time(NULL);
     stat_time = UdmVarListFindStr(&Conf.Vars, "stat_time", "0");
     bzero(&tm, sizeof(tm));

     if (stat_time && (
	    strptime(stat_time, "%Y-%m-%d %H:%M:%S", &tm) ||
	    strptime(stat_time, "%Y-%m-%d %H:%M", &tm) ||
	    strptime(stat_time, "%Y-%m-%d %H:%M", &tm) ||
	    strptime(stat_time, "%Y-%m-%d %H", &tm) ||
	    strptime(stat_time, "%Y-%m-%d", &tm) ||
	    strptime(stat_time, "%Y-%m", &tm))) {

    	    Stats.time = mktime(&tm);
     } else if (stat_time && (Stats.time = Udm_dp2time_t(stat_time)) >= 0) {
    	    Stats.time += time(NULL);
	    localtime_r(&Stats.time, &tm);
     } else {
    	    Stats.time = time(NULL);
	    localtime_r(&Stats.time, &tm);
     }

     strftime(sbuf, sizeof(sbuf), "%Y-%m-%d %H:%M:%S", &tm);
     res=UdmStatAction(Indexer,&Stats);

     printf("\n          Database statistics [%s]\n\n", sbuf);
     printf("%10s %10s %10s\n","Status","Expired","Total");
     printf("   -----------------------------\n");
     for(snum=0;snum<Stats.nstats;snum++){
          UDM_STAT  *S=&Stats.Stat[snum];
          printf("%10d %10d %10d %s\n",S->status,S->expired,S->total,UdmHTTPErrMsg(S->status));
          Total.expired+=S->expired;
          Total.total+=S->total;
     }
     printf("   -----------------------------\n");
     printf("%10s %10d %10d\n","Total",Total.expired,Total.total);
     printf("\n");
     UDM_FREE(Stats.Stat);
     return(res);
}

/* CallBack Func for Referers*/
static void UdmRefProc(int code, const char *url, const char * ref){
     printf("%d %s %s\n",code,url,ref);
}

__C_LINK static int __UDMCALL ShowReferers(UDM_AGENT * Indexer){
int res;
     printf("\n          URLs and referers \n\n");
     res = UdmURLAction(Indexer, NULL, UDM_URL_ACTION_REFERERS);
     return(res);
}

#undef THINFO_TEST
#ifdef THINFO_TEST
/* CallBack function for Thread information */
/*void UdmShowInfo(int handle,char *state, char* str){
     printf("%d %s %s\n",handle,state,str);
}*/
#endif


static int cmpgrp(const void *v1, const void *v2){
     int res;
     const UDM_CHARSET *c1=v1;
     const UDM_CHARSET *c2=v2;
     if ((res = strcasecmp(UdmCsGroup(c1), UdmCsGroup(c2)))) return res;
     return strcasecmp(c1->name,c2->name);
}

static void display_charsets(void){
     UDM_CHARSET *cs=NULL;
     UDM_CHARSET c[100];
     size_t i=0;
     size_t n=0;
     int family=-1;
     
     for(cs=UdmGetCharSetByID(0) ; cs && cs->name ; cs++){
          /* Skip not compiled charsets */
          if(cs->family != UDM_CHARSET_UNKNOWN)
               c[n++]=*cs;
     }
     fprintf(stderr,"\n%d charsets available:\n",n);

     UdmSort(c,n,sizeof(UDM_CHARSET),&cmpgrp);
     for(i=0;i<n;i++){
          if(family!=c[i].family){
               fprintf(stderr, "\n%19s : ", UdmCsGroup(&c[i]));
               family=c[i].family;
          }
          fprintf(stderr,"%s ",c[i].name);
     }
     fprintf(stderr,"\n");
}

static void UdmFeatures(UDM_VARLIST *V){

#ifdef HAVE_PTHREAD
     UdmVarListReplaceStr(V,"HAVE_PTHREAD","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_PTHREAD","no");
#endif
#ifdef USE_HTTPS
     UdmVarListReplaceStr(V,"USE_HTTPS","yes");
#else
     UdmVarListReplaceStr(V,"USE_HTTPS","no");
#endif
#ifdef DMALLOC
     UdmVarListReplaceStr(V,"DMALLOC","yes");
#else
     UdmVarListReplaceStr(V,"DMALLOC","no");
#endif
#ifdef EFENCE
     UdmVarListReplaceStr(V,"EFENCE","yes");
#else
     UdmVarListReplaceStr(V,"EFENCE","no");
#endif
#ifdef CHASEN
     UdmVarListReplaceStr(V,"CHASEN","yes");
#else
     UdmVarListReplaceStr(V,"CHASEN","no");
#endif
#ifdef MECAB
     UdmVarListReplaceStr(V,"MECAB","yes");
#else
     UdmVarListReplaceStr(V,"MECAB","no");
#endif
#ifdef HAVE_ZLIB
     UdmVarListReplaceStr(V,"HAVE_ZLIB","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_ZLIB","no");
#endif
#ifdef USE_SYSLOG
     UdmVarListReplaceStr(V,"USE_SYSLOG","yes");
#else
     UdmVarListReplaceStr(V,"USE_SYSLOG","no");
#endif
#ifdef USE_PARSER
     UdmVarListReplaceStr(V,"USE_PARSER","yes");
#else
     UdmVarListReplaceStr(V,"USE_PARSER","no");
#endif
#ifdef USE_MP3
     UdmVarListReplaceStr(V,"USE_MP3","yes");
#else
     UdmVarListReplaceStr(V,"USE_MP3","no");
#endif
#ifdef USE_FILE
     UdmVarListReplaceStr(V,"USE_FILE","yes");
#else
     UdmVarListReplaceStr(V,"USE_FILE","no");
#endif
#ifdef USE_HTTP
     UdmVarListReplaceStr(V,"USE_HTTP","yes");
#else
     UdmVarListReplaceStr(V,"USE_HTTP","no");
#endif
#ifdef USE_FTP
     UdmVarListReplaceStr(V,"USE_FTP","yes");
#else
     UdmVarListReplaceStr(V,"USE_FTP","no");
#endif
#ifdef USE_NEWS
     UdmVarListReplaceStr(V,"USE_NEWS","yes");
#else
     UdmVarListReplaceStr(V,"USE_NEWS","no");
#endif
#ifdef HAVE_MYSQL
     UdmVarListReplaceStr(V,"HAVE_MYSQL","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_MYSQL","no");
#endif
#ifdef HAVE_PGSQL
     UdmVarListReplaceStr(V,"HAVE_PGSQL","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_PGSQL","no");
#endif
#ifdef HAVE_IODBC
     UdmVarListReplaceStr(V,"HAVE_IODBC","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_IODBC","no");
#endif
#ifdef HAVE_UNIXODBC
     UdmVarListReplaceStr(V,"HAVE_UNIXODBC","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_UNIXODBC","no");
#endif
#ifdef HAVE_DB2
     UdmVarListReplaceStr(V,"HAVE_DB2","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_DB2","no");
#endif
#ifdef HAVE_SOLID
     UdmVarListReplaceStr(V,"HAVE_SOLID","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_SOLID","no");
#endif
#ifdef HAVE_VIRT
     UdmVarListReplaceStr(V,"HAVE_VIRT","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_VIRT","no");
#endif
#ifdef HAVE_EASYSOFT
     UdmVarListReplaceStr(V,"HAVE_EASYSOFT","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_EASYSOFT","no");
#endif
#ifdef HAVE_SAPDB
     UdmVarListReplaceStr(V,"HAVE_SAPDB","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_SAPDB","no");
#endif
#ifdef HAVE_IBASE
     UdmVarListReplaceStr(V,"HAVE_IBASE","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_IBASE","no");
#endif
#ifdef HAVE_CTLIB
     UdmVarListReplaceStr(V,"HAVE_CTLIB","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CTLIB","no");
#endif
#ifdef HAVE_ORACLE8
     UdmVarListReplaceStr(V,"HAVE_ORACLE8","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_ORACLE8","no");
#endif
#ifdef HAVE_CHARSET_big5
     UdmVarListReplaceStr(V,"HAVE_CHARSET_big5","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_big5","no");
#endif
#ifdef HAVE_CHARSET_euc_kr
     UdmVarListReplaceStr(V,"HAVE_CHARSET_euc_kr","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_euc_kr","no");
#endif
#ifdef HAVE_CHARSET_gb2312
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gb2312","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gb2312","no");
#endif
#ifdef HAVE_CHARSET_japanese
     UdmVarListReplaceStr(V,"HAVE_CHARSET_japanese","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_japanese","no");
#endif
#ifdef HAVE_CHARSET_gbk
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gbk","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gbk","no");
#endif
#ifdef HAVE_CHARSET_gujarati
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gujarati","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_gujarati","no");
#endif
#ifdef HAVE_CHARSET_tscii
     UdmVarListReplaceStr(V,"HAVE_CHARSET_tscii","yes");
#else
     UdmVarListReplaceStr(V,"HAVE_CHARSET_tscii","no");
#endif

}

#ifndef WIN32

static int usage(int level){

     fprintf(stderr,
"\n\
indexer from %s-%s-%s\n\
http://www.mnogosearch.org/ (C)1998-2004, LavTech Corp.\n\
\n\
Usage: indexer [OPTIONS]  [configfile]\n\
\n\
Indexing options:\
"
#ifdef HAVE_SQL
"\n\
  -a              reindex all documents even if not expired (may be\n\
                  limited using -t, -u, -s, -c, -y and -f options)\n\
  -m              reindex expired documents even if not modified (may\n\
                  be limited using -t, -u, -c, -s, -y and -s options)\n\
  -e              index 'most expired' (oldest) documents first\n\
  -o              index documents with less depth (hops value) first\n\
  -r              do not try to reduce remote servers load by randomising\n\
                  url fetch list before indexing (recommended for very \n\
            big number of URLs)\n\
  -n n            index only n documents and exit\n\
  -c n            index only n seconds and exit\n\
  -q              quick startup (do not add Server URLs)\n\
"
#endif
"\n\
  -b              block starting more than one indexer instances\n\
  -i              insert new URLs (URLs to insert must be given using -u or -f)\n\
  -p n            sleep n seconds after each URL\n\
  -w              do not warn before clearing documents from database\n\
"
#ifdef HAVE_PTHREAD
"  -N n            run N threads\n\
"
#endif

#ifdef HAVE_SQL
"\n\
Subsection control options (may be combined):\n\
  -s status       limit indexer to documents matching status (HTTP Status code)\n\
  -t tag          limit indexer to documents matching tag\n\
  -g category     limit indexer to documents matching category\n\
  -y content-type limit indexer to documents matching content-type\n\
  -L language     limit indexer to documents matching language\n\
  -u pattern      limit indexer to documents with URLs matching pattern\n\
                  (supports SQL LIKE wildcard '%%')\n\
  -f filename     read URLs to be indexed/inserted/cleared from file (with -a\n\
                  or -C option, supports SQL LIKE wildcard '%%'; has no effect\n\
                  when combined with -m option)\n\
  -f -            Use STDIN instead of file as URL list\n\
"
#else
"\n\
URL options:\n\
  -u URL          insert URL at startup\n\
  -f filename     read URLs to be inserted from file\n\
"
#endif
"\n\
Logging options:\n\
"
#ifdef LOG_PERROR
"  -l              do not log to stdout/stderr\n\
"
#endif
"  -v n            verbose level, 0-5\n\
\n\
Misc. options:\n\
"
#ifdef HAVE_SQL

"  -C              clear database and exit\n\
  -S              print statistics and exit\n\
  -j t            set current time for statistic (use with -S),\n\
                  YYYY-MM[-DD[ HH[:MM[:SS]]]]\n\
	          or time offset, e.g. 1d12h (see Period in indexer.conf)\n\
  -I              print referers and exit\n\
  -R              calculate popularity rank\n\
  -Ecreate        create SQL table structure and exit\n\
  -Edrop          drop SQL table structure and exit\n\
"
#endif
"  -h,-?           print help page and exit\n\
  -hh             print more help and exit\n\
\n\
\n\
Please post bug reports and suggestions at http://www.mnogosearch.org/bugs/\n",
     PACKAGE,VERSION,UDM_DBTYPE);
     
     if(level>1)display_charsets();
     return(0);
}

#endif /* ifndef WIN32 */

UDM_AGENT Main;

/*
  Load indexer.conf and check if any DBAddr were given
*/
static int UdmIndexerEnvLoad(UDM_AGENT *Indexer, const char *fname,int lflags){
     int rc;
     if (UDM_OK == (rc= UdmEnvLoad(Indexer, fname, lflags))){
          if (Indexer->Conf->dbl.nitems == 0){
               sprintf(Indexer->Conf->errstr, "Error: '%s': No required DBAddr commands were specified", fname);
               rc= UDM_ERROR;
          }
     }
     return rc;
}

static enum udm_indcmd UdmIndCmd(const char *cmd) {
  if (!cmd)return UDM_IND_INDEX;
  if (!strncasecmp(cmd,"ind",3))return UDM_IND_INDEX;
  if (!strncasecmp(cmd,"sta",3))return UDM_IND_STAT;
  if (!strncasecmp(cmd,"cre",3))return UDM_IND_CREATE;
  if (!strncasecmp(cmd,"dro",3))return UDM_IND_DROP;
  if (!strncasecmp(cmd,"del",3))return UDM_IND_DELETE;
  if (!strncasecmp(cmd,"ref",3))return UDM_IND_REFERERS;
  if (!strncasecmp(cmd,"sql",3))return UDM_IND_SQLMON;
  if (!strncasecmp(cmd,"che",3))return UDM_IND_CHECKCONF;
  if (!strncasecmp(cmd,"blo",3))return UDM_IND_MULTI2BLOB;
  return UDM_IND_INDEX;
}

/*
  Parse command line
*/
static int UdmARGC;
static char **UdmARGV;
static enum udm_indcmd cmd = UDM_IND_INDEX;
static int insert = 0, expire = 0, pop_rank = 0, mkind = 0, block = 0, help = 0;
static char *url_filename=NULL;

static void UdmParseCmdLine(void) {
  int ch;

     while ((ch = getopt(UdmARGC, UdmARGV, "QUCSIRMabheorldmqiw?E:F:t:u:s:n:v:L:A:D:p:N:f:c:g:y:j:")) != -1){
          switch (ch) {
          case 'F': {
               UDM_VARLIST V,W;
               size_t i;
               
               UdmVarListInit(&V);
               UdmVarListInit(&W);
               UdmFeatures(&V);
               UdmVarListAddLst(&W,&V,NULL,optarg);
               for(i=0;i<W.nvars;i++)
                    printf("%s:%s\n",W.Var[i].name,W.Var[i].val);
               exit(0);
          }
          case 'C': cmd= UDM_IND_DELETE;  add_servers=0;load_langmaps=0;load_spells=0;break;
          case 'S': cmd= UDM_IND_STAT;    add_servers=0;load_langmaps=0;load_spells=0;break;
          case 'I': cmd= UDM_IND_REFERERS;add_servers=0;load_langmaps=0;load_spells=0;break;
          case 'Q': cmd= UDM_IND_SQLMON;  add_servers=0;load_langmaps=0;load_spells=0;break;
          case 'E': cmd= UdmIndCmd(optarg);break;
          case 'R': pop_rank++; break;
          case 'M': mkind=1;break;
          case 'q': add_server_urls = 0; break;
          case 'l': log2stderr=0;break;
          case 'a': expire=1;break;
          case 'b': block++;break;
          case 'e': flags|=UDM_FLAG_SORT_EXPIRED;break;
          case 'o': flags|=UDM_FLAG_SORT_HOPS;break;
          case 'r': flags|=UDM_FLAG_DONTSORT_SEED; break;
          case 'm': flags|=UDM_FLAG_REINDEX;break;
          case 'n': Conf.url_number=atoi(optarg);break;
          case 'c': max_index_time=atoi(optarg);break;
          case 'v': UdmSetLogLevel(NULL, atoi(optarg)); break;
          case 'p': seconds=atoi(optarg);break;
          case 't': UdmVarListAddStr(&Conf.Vars,"tag" , optarg);break;
          case 'g': UdmVarListAddStr(&Conf.Vars,"cat" , optarg);break;
          case 's': UdmVarListAddStr(&Conf.Vars, "status", optarg);break;
          case 'y': UdmVarListAddStr(&Conf.Vars,"type", optarg);break;
          case 'L': UdmVarListAddStr(&Conf.Vars,"lang", optarg);break;
          case 'u': UdmVarListAddStr(&Conf.Vars,"u"   , optarg);
               if(insert){
                    UDM_HREF Href;
                    UdmHrefInit(&Href);
                    Href.url=optarg;
                    Href.method=UDM_METHOD_GET;
                    UdmHrefListAdd(&Main.Hrefs, &Href);
               }
               break;
          case 'N': maxthreads=atoi(optarg);break;
          case 'f': url_filename=optarg;break;
          case 'i': insert=1;break;
          case 'w': warnings=0;break;
	  case 'j': UdmVarListAddStr(&Conf.Vars, "stat_time", optarg); break;
          case '?':
          case 'h':
          default:
               help++;
          }
     }

}





#ifdef  WIN32
unsigned int __stdcall thread_main(void *arg) {
	char *str_buf;
#else
static void * thread_main(void *arg){
#endif
     UDM_AGENT * Indexer = (UDM_AGENT *)arg;
     int res=UDM_OK;
     int done=0;
     int i_sleep=0;
     
     while(!done){
     
          if(max_index_time>=0){
               time_t now;
               
               time(&now);
               if((now-Indexer->start_time)>max_index_time)
                    break;
          }

	  UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
          if(have_sighup){
               UDM_ENV   NewConf;
               int  rc;
               
               UdmLog(Indexer,UDM_LOG_ERROR,"Reloading config '%s'",cname);

               UdmEnvInit(&NewConf);
               UdmSetLockProc(&NewConf,UdmLockProc);
               UdmSetRefProc(&NewConf,UdmRefProc);

	       UDM_GETLOCK(Indexer, UDM_LOCK_CONF);
               Indexer->Conf = &NewConf;
               rc = UdmIndexerEnvLoad(Indexer, cname, add_servers + load_langmaps + UDM_FLAG_SPELL);
               Indexer->Conf = &Conf;
               
               if(rc!=UDM_OK){
		    UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);
                    UdmLog(Indexer,UDM_LOG_ERROR,"Can't load config: %s",UdmEnvErrMsg(&NewConf));
                    UdmLog(Indexer,UDM_LOG_ERROR,"Continuing with old config");
                    UdmEnvFree(&NewConf);
               }else{
                    UdmEnvFree(&Conf);
                    Conf=NewConf;
		    UdmParseCmdLine();
#ifdef WIN32
#else
                    UdmOpenLog("indexer", &Conf, log2stderr);
#endif
		    UDM_RELEASELOCK(Indexer, UDM_LOCK_CONF);
              }

               have_sighup=0;
          }
	  if (have_sigint || have_sigterm) {
	    int z;
            UdmLog(Indexer, UDM_LOG_ERROR, "%s received. Terminating. Please wait...", (have_sigint) ? "SIGINT" : "SIGTERM");
	    for (z = 0 ; z < total_threads; z++)
	      if (ThreadIndexers[z]) UdmAgentSetAction(ThreadIndexers[z], UDM_TERMINATED);
	    UdmAgentSetAction(&Main, UDM_TERMINATED);
	    have_sigint = have_sigterm = 0;
	  }

	  if (have_sigusr1) {
	    UdmIncLogLevel(Indexer);
	    have_sigusr1 = 0;
	  }
	  if (have_sigusr2) {
	    UdmDecLogLevel(Indexer);
	    have_sigusr2 = 0;
	  }
	  UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);

          if(done)break;
          
          if(res == UDM_OK || res == UDM_NOTARGET) { /* Possible after bad startup */
               res=UdmIndexNextURL(Indexer);
	  }
          
          UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
          cur_url_number++;
          UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);

          switch(res){
               case UDM_OK:
                    if(i_sleep){
                         UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
                         sleep_threads--;
                         UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);
                         i_sleep=0;
                    }
                    break;

               
               case UDM_NOTARGET:
#ifdef HAVE_PTHREAD
               /* in multi-threaded environment we          */
               /* should wait for a moment when every thread     */
               /* has nothing to do                    */

		 UdmURLAction(Indexer, NULL, UDM_URL_ACTION_FLUSH); /* flush DocCache */

                    if(!i_sleep){
                         UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
                         sleep_threads++;
                         UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);
                         i_sleep=1;
                    }

                    UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
                    done=(sleep_threads>=total_threads);
                    UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);

                    break;
#else
                    done=1;
                    break;
#endif
               case UDM_ERROR:
#ifdef WIN32
			str_buf = (char *)UdmMalloc(1024);
			udm_snprintf(str_buf, 1024, "Error: %s",  UdmEnvErrMsg(Indexer->Conf));
		    UdmShowInfo(0,NULL, str_buf);
			UDM_FREE(str_buf);
		    UdmShowInfo(Indexer, "Error", UdmEnvErrMsg(Indexer->Conf));
#endif
	       case UDM_TERMINATED:
#ifdef WIN32
		    UdmShowInfo(Indexer, "Aborted", "");
#endif

               default:
#ifdef HAVE_PTHREAD
               /* in multi-threaded environment we          */
               /* should wait for a moment when every thread     */
               /* has nothing to do                    */

                    if(!i_sleep){
		         if (res == UDM_ERROR) UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Indexer->Conf));
                         UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
                         sleep_threads++;
                         UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);
                         i_sleep=1;
                    }

                    UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
                    done=(sleep_threads>=total_threads);
                    UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);

                    break;
#else
                    if (res == UDM_ERROR) UdmLog(Indexer,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Indexer->Conf));
                    done=1;
#endif
                    break;
          }
          if((seconds)&&(!done)){
               UdmLog(Indexer,UDM_LOG_DEBUG,"Sleeping %d second(s)",seconds);
#ifndef WIN32
               Indexer->nsleepsecs += seconds - UDMSLEEP(seconds);
#else
			   Indexer->nsleepsecs += seconds;
			   UDMSLEEP(seconds);
#endif
          }
          if ((i_sleep) && (!done)) {
                  UdmLog(Indexer, UDM_LOG_ERROR, "%s, sleeping %d seconds", 
                      (res == UDM_NOTARGET) ? "No targets" : ((res == UDM_TERMINATED) ? "Terminating" : "An error occured"), 60);
#ifndef WIN32
               Indexer->nsleepsecs += 60 - UDMSLEEP(60);
#else
			   Indexer->nsleepsecs += 60;
			   UDMSLEEP(60);
#endif
          }
     }

     if(res!=UDM_ERROR){
          time_t now, sec;
          float M = 0.0, K = 0.0;
	  int z;

          UdmURLAction(Indexer, NULL, UDM_URL_ACTION_FLUSH); /* flush DocCache */
	  UdmMultiCacheFlush(Indexer);

          time(&now);
          sec = now - Indexer->start_time - Indexer->nsleepsecs;
          if (sec > 0) {
               M = Indexer->nbytes / 1048576.0 / sec;
               if (M < 1.0) K = Indexer->nbytes / 1024.0 / sec;
          }
          UdmLog(Indexer,UDM_LOG_ERROR,"Done (%d seconds, %u documents, %u bytes, %5.2f %cbytes/sec.)",
               sec, Indexer->ndocs, Indexer->nbytes, (M < 1.0) ? K : M, (M < 1.0) ? 'K' : 'M' );
#if !defined(WIN32) && defined(HAVE_PTHREAD)
		  for (z = 0 ; z < total_threads; z++)
	      if (ThreadIndexers[z])  pthread_kill(threads[z], SIGALRM); /* wake-up sleeping threads */
#endif
     }
     UDM_GETLOCK(Indexer, UDM_LOCK_THREAD);
     total_threads--;
     ThreadIndexers[Indexer->handle - 1] = NULL;
     UDM_RELEASELOCK(Indexer, UDM_LOCK_THREAD);

     UdmAgentFree(Indexer);
     
#ifdef WIN32
     return(0);
#else
     return(NULL);
#endif
}


static char pidname[1024];
static char time_pid[100];

static void exitproc(void){
     unlink(pidname);
}

#ifndef WIN32

static char * time_pid_info(void){
     struct tm * tim;
     time_t t;
     t=time(NULL);
     tim=localtime(&t);
     strftime(time_pid,sizeof(time_pid),"%a %d %T",tim);
     sprintf(time_pid+strlen(time_pid)," [%d]",(int)getpid());
     return(time_pid);
}

#endif

static void UdmWSAStartup(void){
#ifdef WIN32
     WSADATA wsaData;
     if(WSAStartup(0x101,&wsaData)!=0){
          fprintf(stderr,"WSAStartup() error %d\n",WSAGetLastError);
          exit(1);
     }
#endif
}

static void UdmWSACleanup(void){
#ifdef WIN32
     WSACleanup();
#endif
     return;
}

static int UdmConfirm(const char *msg)
{
        char str[5];
        printf("%s",msg);
        return (fgets(str,sizeof(str),stdin) && !strncmp(str,"YES",3));
}

static int UdmClear(UDM_AGENT *A, const char *url_fname)
{
     int clear_confirmed=1;
     if(warnings) {
          size_t i;
          printf("You are going to delete content from database(s):\n");
          for (i = 0; i < Conf.dbl.nitems; i++)
          {
            const char *dbaddr;
            dbaddr= UdmVarListFindStr(&Conf.dbl.db[i].Vars,"DBAddr","<noaddr>");
            printf("%s\n", dbaddr);
          }
          clear_confirmed=UdmConfirm("Are you sure?(YES/no)");
     }
     
     if(clear_confirmed) {
          if(url_fname) {
               if(UDM_OK!=UdmURLFile(A,url_fname,UDM_URL_FILE_CLEAR)){
                    UdmLog(A,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(A->Conf));
               }
          }
          else {
               printf("Deleting...");
               if(UDM_OK!=UdmClearDatabase(A)){
                    return UDM_ERROR;
               }
               printf("Done\n");
          }
     }else{
          printf("Canceled\n");
     }
     return UDM_OK;
}


static int UdmIndex(UDM_AGENT *A) {
#ifdef WIN32
	int int_res;	/* Temporary variable used to check return result */
#endif

	ThreadIndexers = (UDM_AGENT**)UdmMalloc(maxthreads * sizeof(UDM_AGENT*));
               
#ifdef HAVE_PTHREAD
     {
          int i;

#ifdef WIN32
          threads = (HANDLE*)UdmMalloc(maxthreads * sizeof(HANDLE));
#else
          threads = (pthread_t*)UdmMalloc(maxthreads * sizeof(pthread_t));
#endif
               
          for(i=0;i<maxthreads;i++){
	    UDM_AGENT *Indexer;
	    if (seconds) UDMSLEEP(seconds);

	    UDM_GETLOCK(A, UDM_LOCK_THREAD);
	    UDM_GETLOCK(A, UDM_LOCK_CONF);
	    Indexer = UdmAgentInit(NULL, A->Conf, next_thread++);
	    Indexer->flags = flags;
	    ThreadIndexers[Indexer->handle - 1] = Indexer;
	    UDM_RELEASELOCK(A, UDM_LOCK_CONF);
	    UDM_RELEASELOCK(A, UDM_LOCK_THREAD);

#ifdef WIN32
			   int_res = _beginthreadex(NULL, 0, &thread_main, NULL, 0, NULL);
			   assert(int_res != -1);
#else
               {
                    pthread_attr_t attr;
                    size_t stksize = 1024 * 512;
                    
                    pthread_attr_init(&attr);
                    pthread_attr_setstacksize(&attr, stksize);
                    pthread_create(&threads[i], &attr, &thread_main, Indexer);
                    pthread_attr_destroy(&attr);
               }
#endif
	       UDM_GETLOCK(A, UDM_LOCK_THREAD);
	       total_threads = i + 1;
	       UDM_RELEASELOCK(A,UDM_LOCK_THREAD);
          }
#ifndef WIN32        
	  for (i = 0; i < maxthreads; i++) pthread_join(threads[i], NULL);
#else
          while(1){
               int num;
               UDM_GETLOCK(A,UDM_LOCK_THREAD);
               num=total_threads;
               UDM_RELEASELOCK(A,UDM_LOCK_THREAD);
               if(!num)break;
               UDMSLEEP(1);
          }
#endif
          UDM_FREE(threads);
     }
#else
     Main.handle = 1;
     thread_main(&Main);
#endif

     UDM_FREE(ThreadIndexers);

     return UDM_OK;
}

#ifndef WIN32

int main(int argc, char **argv) {
     char      *language=NULL,*affix=NULL,*dictionary=NULL, *env;
     int       pid_fd;
     char      pidbuf[1024];
#ifdef CHASEN
     char            *chasen_argv[] = { "chasen", "-b", "-f", "-F", "%m ", NULL };
     chasen_getopt_argv(chasen_argv, NULL);
#endif
     
     UdmWSAStartup();
     
     UdmInit(); /* Initialize library */
     
     UdmInitMutexes();
     UdmEnvInit(&Conf);
     UdmSetLockProc(&Conf,UdmLockProc);
     UdmSetRefProc(&Conf,UdmRefProc);
#ifdef THINFO_TEST
     UdmSetThreadProc(&Conf,UdmShowInfo);
#endif
     UdmAgentInit(&Main,&Conf,0);
     
     UdmARGC = argc;
     UdmARGV = argv;

     UdmParseCmdLine();
     
     if (cmd != UDM_IND_INDEX)
     {
       add_servers=0;
       load_langmaps=0;
       load_spells=0;
     }
     
     flags|=add_servers;
     flags |= add_server_urls;
     Main.flags = flags;

     argc -= optind;argv += optind;

     if((argc>1) || (help)){
          usage(help);
          UdmEnvFree(&Conf);
          return(1);
     }
     
     env=getenv("UDM_CONF_DIR");
     UdmVarListReplaceStr(&Conf.Vars,"ConfDir",env?env:UDM_CONF_DIR);
     
     env=getenv("UDM_SHARE_DIR");
     UdmVarListReplaceStr(&Conf.Vars,"ShareDir",env?env:UDM_SHARE_DIR);
     
     if(argc==1){
          strncpy(cname,argv[0],sizeof(cname));
          cname[sizeof(cname)-1]='\0';
     }else{
          const char *cd=UdmVarListFindStr(&Conf.Vars,"UDM_CONF_DIR",UDM_CONF_DIR);
          udm_snprintf(cname,sizeof(cname),"%s%s%s",cd,UDMSLASHSTR,"indexer.conf");
          cname[sizeof(cname)-1]='\0';
     }
     
     if(UDM_OK!=UdmIndexerEnvLoad(&Main, cname, add_servers + load_langmaps + load_spells + add_server_urls)) {
          fprintf(stderr,"%s\n",UdmEnvErrMsg(&Conf));
          UdmEnvFree(&Conf);
          exit(1);
     }
     
     if (cmd==UDM_IND_CHECKCONF){
          exit(0);
     }
     
#ifdef WIN32
#else
     UdmOpenLog("indexer",&Conf, log2stderr);
     UdmSigHandlersInit(&Main);
#endif
     
     if (cmd==UDM_IND_SQLMON){
          UDM_SQLMON_PARAM prm;
          bzero((void*)&prm,sizeof(prm));
          prm.infile= stdin;
          prm.outfile= stdout;
          prm.flags= UDM_SQLMON_DISPLAY_FIELDS;
          prm.gets= sqlmongets;
          prm.display= UdmDisplaySQLQuery;
          prm.prompt= sqlmonprompt;
          UdmSQLMonitor(&Main, &Conf, &prm);
          exit(0);
     }

     if (cmd == UDM_IND_MULTI2BLOB) {
         UdmMulti2Blob(&Main);
         exit(0);
     }
     
     if(url_filename && strcmp(url_filename,"-")) {
          /* Make sure URL file is readable if not STDIN */
          FILE *url_file;
          if(!(url_file=fopen(url_filename,"r"))){
               UdmLog(&Main,UDM_LOG_ERROR,"Error: can't open url file '%s': %s",url_filename, strerror(errno));
               goto ex;
          }
          fclose(url_file);
     }
     
     if(insert && url_filename) {
          
          if(strcmp(url_filename,"-")){
               /* Make sure all URLs to be inserted are OK */
               if(UDM_OK!=UdmURLFile(&Main, url_filename,UDM_URL_FILE_PARSE)){
                    UdmLog(&Main,UDM_LOG_ERROR,"Error: Invalid URL in '%s'",url_filename);
                    goto ex;
               }
          }
          
          if(UDM_OK!=UdmURLFile(&Main,url_filename,UDM_URL_FILE_INSERT)){
               UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               goto ex;
          }
     }
     
     if(expire){
          int  res;
          
          if(url_filename){
               res=UdmURLFile(&Main,url_filename,UDM_URL_FILE_REINDEX);
          }else{
               res = UdmURLAction(&Main, NULL, UDM_URL_ACTION_EXPIRE);
          }
          if(res!=UDM_OK){
               UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               goto ex;
          }
     }
     
     if(affix||dictionary){
          if(!language){
               UdmLog(&Main,UDM_LOG_ERROR,"Error: Language is not specified for import!");
          }else
          if(strlen(language)!=2){
               UdmLog(&Main,UDM_LOG_ERROR,"Error: Language should be 2 letters!");
          }
          goto ex;
     }

     switch(cmd){
          case UDM_IND_DELETE:
               if(UDM_OK!=UdmClear(&Main,url_filename)){
                    UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               }
               break;
          case UDM_IND_STAT:
               if(UDM_OK!=ShowStatistics(&Main)){
                    UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               }
               break;
          case UDM_IND_REFERERS:
               if(UDM_OK!=ShowReferers(&Main)){
                    UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               }
               break;
          case UDM_IND_CREATE:
          case UDM_IND_DROP:
               if(UDM_OK!=CreateOrDrop(&Main,cmd)){
                    UdmLog(&Main,UDM_LOG_ERROR,"Error: '%s'",UdmEnvErrMsg(Main.Conf));
               }
               break;
          default:
          {
               if (block) {
                    /* Check that another instance isn't running */
                    /* and create PID file.                      */
                    
                    sprintf(pidname,"%s/%s",UDM_VAR_DIR,"indexer.pid");
                    pid_fd = open(pidname,O_CREAT|O_EXCL|O_WRONLY,0644);
                    if(pid_fd < 0){
                         fprintf(stderr,"%s Can't create '%s': %s\n", time_pid_info(), pidname, strerror(errno));
                         if(errno == EEXIST){
                              fprintf(stderr,"It seems that another indexer is already running!\n");
                              fprintf(stderr,"Remove '%s' if it is not true.\n",pidname);
                         }
                         goto ex;
                    }
                    sprintf(pidbuf,"%d\n",(int)getpid());
                    write(pid_fd,&pidbuf,strlen(pidbuf));
#ifdef HAVE_ATEXIT
                    atexit(&exitproc);
#endif
               }
               UdmLog(&Main,UDM_LOG_ERROR, "indexer from %s-%s-%s started with '%s'", PACKAGE, VERSION, UDM_DBTYPE, cname);
               UdmStoreHrefs(&Main);    /**< store hrefs from config and command line */
               UdmIndex(&Main);
          }
     }
     
#ifdef HAVE_SQL
     if (pop_rank) {
       UdmSrvAction(&Main, NULL, UDM_SRV_ACTION_POPRANK);
     }
#endif
     
ex:
     total_threads=0;
     UdmAgentFree(&Main);
     UdmEnvFree(&Conf);
     UdmDestroyMutexes();
     UdmWSACleanup();
#ifndef HAVE_ATEXIT
     exitproc();
#endif
     return(0);
}

#endif /* ifndef WIN32 */
