/*************************************************************************************************
 * The search helper for the node master
 *                                                      Copyright (C) 2004-2006 Mikio Hirabayashi
 * This file is part of Hyper Estraier.
 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License or any later version.  Hyper Estraier is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 * You should have received a copy of the GNU Lesser General Public License along with Hyper
 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307 USA.
 *************************************************************************************************/


#include "mastermod.h"


/* global variables */
const char *g_progname;                  /* program name */


/* function prototypes */
int main(int argc, char **argv);
static void usage(void);
static int runsearch(int argc, char **argv);
static int procsearch(const char *dbname, CBMAP *params, const char *outfile,
                      const char *myurl, const char *mylabel, int rateuri, int scoreexpr,
                      int searchmax, int wwidth, int hwidth, int awidth,
                      int scancheck, int smlrvnum);


/* main routine */
int main(int argc, char **argv){
  const char *tmp;
  int rv;
  if((tmp = getenv("ESTDBGFD")) != NULL) dpdbgfd = atoi(tmp);
  est_proc_env_reset();
  g_progname = argv[0];
  if(argc < 2) usage();
  rv = 0;
  if(!strcmp(argv[1], "search")){
    rv = runsearch(argc, argv);
  } else {
    usage();
  }
  return rv;
}


/* parse arguments of the search command */
static int runsearch(int argc, char **argv){
  CBMAP *params;
  const char *dbname, *outfile;
  char *buf, *myurl, *mylabel;
  int rv, size, scoreexpr, rateuri, searchmax, wwidth, hwidth, awidth, scancheck, smlrvnum;
  if(argc < 15) usage();
  dbname = argv[2];
  buf = cbbasedecode(argv[3], &size);
  params = cbmapload(buf, size);
  outfile = argv[4];
  myurl = cbbasedecode(argv[5], NULL);
  cbglobalgc(myurl, free);
  mylabel = cbbasedecode(argv[6], NULL);
  cbglobalgc(mylabel, free);
  rateuri = atoi(argv[7]);
  scoreexpr = atoi(argv[8]);
  searchmax = atoi(argv[9]);
  wwidth = atoi(argv[10]);
  hwidth = atoi(argv[11]);
  awidth = atoi(argv[12]);
  scancheck = atoi(argv[13]);
  smlrvnum = atoi(argv[14]);
  if(searchmax < 0 || wwidth < 0 || hwidth < 0 || awidth < 0) usage();
  rv = procsearch(dbname, params, outfile, myurl, mylabel, scoreexpr, rateuri,
                  searchmax, wwidth, hwidth, awidth, scancheck, smlrvnum);
  cbmapclose(params);
  free(buf);
  return rv;
}


/* print the usage and exit */
static void usage(void){
  fprintf(stderr, "%s: the search helper for the node master\n", g_progname);
  fprintf(stderr, "\n");
  fprintf(stderr, "usage:\n");
  fprintf(stderr, "  %s db args outfile\n", g_progname);
  fprintf(stderr, "\n");
  exit(1);
}


/* perform the search command */
static int procsearch(const char *dbname, CBMAP *params, const char *outfile,
                      const char *myurl, const char *mylabel, int rateuri, int scoreexpr,
                      int searchmax, int wwidth, int hwidth, int awidth,
                      int scancheck, int smlrvnum){
  ESTDB *db;
  ESTCOND *cond;
  ESTDOC *doc;
  CBMAP *hints, *kwords;
  const CBLIST *texts;
  CBLIST *words, *list;
  CBDATUM *datum;
  const char *bordstr, *tmp, *kbuf, *vbuf;
  char name[NUMBUFSIZ], *snippet;
  int i, j, ecode, err, max, skip, num, *res, rnum, hnum, ksiz, vsiz, snum, score, down, id;
  double curtime;
  if(!(db = est_db_open(dbname, ESTDBREADER | ESTDBNOLCK, &ecode))) return 1;
  err = FALSE;
  bordstr = est_border_str();
  cond = est_cond_new();
  max = DEFMAXSRCH;
  skip = 0;
  if((tmp = cbmapget(params, "skip", -1, NULL)) != NULL && (num = atoi(tmp)) > 0)
    skip = num;
  if((tmp = cbmapget(params, "phrase", -1, NULL)) != NULL && tmp[0] != '\0')
    est_cond_set_phrase(cond, tmp);
  if((tmp = cbmapget(params, "attr", -1, NULL)) != NULL && tmp[0] != '\0')
    est_cond_add_attr(cond, tmp);
  for(i = 0; i <= CONDATTRMAX; i++){
    num = sprintf(name, "attr%d", i);
    if((tmp = cbmapget(params, name, num, NULL)) != NULL && tmp[0] != '\0')
      est_cond_add_attr(cond, tmp);
  }
  if((tmp = cbmapget(params, "order", -1, NULL)) != NULL && tmp[0] != '\0')
    est_cond_set_order(cond, tmp);
  if((tmp = cbmapget(params, "max", -1, NULL)) != NULL && (num = atoi(tmp)) >= 0)
    max = num;
  max = max > searchmax ? searchmax : max;
  if((tmp = cbmapget(params, "options", -1, NULL)) != NULL && (num = atoi(tmp)) > 0)
    est_cond_set_options(cond, num);
  if((tmp = cbmapget(params, "auxiliary", -1, NULL)) != NULL)
    est_cond_set_auxiliary(cond, atoi(tmp));
  if((tmp = cbmapget(params, "wwidth", -1, NULL)) != NULL) wwidth = atoi(tmp);
  if((tmp = cbmapget(params, "hwidth", -1, NULL)) != NULL && (num = atoi(tmp)) >= 0)
    hwidth = num;
  if((tmp = cbmapget(params, "awidth", -1, NULL)) != NULL && (num = atoi(tmp)) >= 0)
    awidth = num;
  est_cond_set_max(cond, max + skip + 1);
  est_cond_set_options(cond, ESTCONDSCFB);
  hints = cbmapopenex(MINIBNUM);
  curtime = est_gettimeofday();
  res = est_db_search(db, cond, &rnum, hints);
  hnum = (tmp = cbmapget(hints, "", 0, NULL)) ? atoi(tmp) : rnum;
  if(max >= 0 && hnum < max + 1 && est_cond_auxiliary_word(cond, "")){
    free(res);
    est_cond_set_auxiliary(cond, -1);
    res = est_db_search(db, cond, &rnum, hints);
    hnum = (tmp = cbmapget(hints, "", 0, NULL)) ? atoi(tmp) : rnum;
  }
  words = est_hints_to_words(hints);
  datum = cbdatumopen(NULL, -1);
  est_datum_printf(datum, "%s\n", bordstr);
  est_datum_printf(datum, "VERSION\t%s\n", _EST_PROTVER);
  est_datum_printf(datum, "NODE\t%s\n", myurl);
  est_datum_printf(datum, "HIT\t%d\n", hnum);
  cbmapiterinit(hints);
  num = 1;
  while((kbuf = cbmapiternext(hints, &ksiz)) != NULL){
    if(ksiz < 1) continue;
    est_datum_printf(datum, "HINT#%d\t%s\t%s\n", num, kbuf, cbmapget(hints, kbuf, ksiz, NULL));
    num++;
  }
  est_datum_printf(datum, "DOCNUM\t%d\n", est_db_doc_num(db));
  est_datum_printf(datum, "WORDNUM\t%d\n", est_db_word_num(db));
  curtime = est_gettimeofday() - curtime;
  est_datum_printf(datum, "TIME\t%.6f\n", curtime / 1000.0);
  est_datum_printf(datum, "TIME#i\t%.6f\n", curtime / 1000.0);
  est_datum_printf(datum, "TIME#0\t%.6f\n", curtime / 1000.0);
  est_datum_printf(datum, "LINK#0\t%s\t", myurl);
  est_datum_printf(datum, "%s\t%d\t%d\t%d\t%.0f\t%d\n", mylabel, SELFCREDIT,
                   est_db_doc_num(db), est_db_word_num(db), est_db_size(db), hnum);
  est_datum_printf(datum, "VIEW\tSNIPPET\n");
  est_datum_printf(datum, "\n");
  snum = 0;
  for(i = 0; i < rnum && snum < max; i++){
    if(!(doc = est_db_get_doc(db, res[i], 0))) continue;
    if(scancheck && !est_db_scan_doc(db, doc, cond)){
      est_doc_delete(doc);
      continue;
    }
    if(snum < skip){
      snum++;
      est_doc_delete(doc);
      continue;
    }
    est_datum_printf(datum, "%s\n", bordstr);
    score = est_cond_score(cond, i);
    if(rateuri && scoreexpr != SE_ASIS){
      if((vbuf = est_doc_attr(doc, ESTDATTRURI)) != NULL){
        if(score < 100) score = 100;
        down = 4;
        if(cbstrfwimatch(vbuf, "file://")){
          vbuf += 7;
        } else if(cbstrfwimatch(vbuf, "ftp://")){
          vbuf += 6;
        } else if(cbstrfwimatch(vbuf, "http://")){
          vbuf += 7;
        } else if(cbstrfwimatch(vbuf, "https://")){
          vbuf += 8;
        } else {
          down += 3;
        }
        while(vbuf[0] != '\0'){
          if(vbuf[0] == '?' || vbuf[0] == '#'){
            down++;
            break;
          }
          if(vbuf[0] == '/' && vbuf[1] != '\0') down++;
          vbuf++;
        }
        score *= 8.0 / (double)down;
      } else {
        score = 0;
      }
    }
    est_doc_add_attr(doc, DATTRNDURL, myurl);
    est_doc_add_attr(doc, DATTRNDLABEL, mylabel);
    if(score >= 0){
      sprintf(name, "%d", score);
      est_doc_add_attr(doc, DATTRNDSCORE, name);
    }
    list = est_doc_attr_names(doc);
    for(j = 0; j < cblistnum(list); j++){
      vbuf = cblistval(list, j, NULL);
      est_datum_printf(datum, "%s=%s\n", vbuf, est_doc_attr(doc, vbuf));
    }
    cblistclose(list);
    if(smlrvnum > 0){
      est_datum_printf(datum, "%s", ESTDCNTLVECTOR);
      id = est_doc_id(doc);
      kwords = id > 0 ? est_db_get_keywords(db, id) : NULL;
      if(!kwords) kwords = est_db_etch_doc(db, doc, smlrvnum);
      cbmapiterinit(kwords);
      while((kbuf = cbmapiternext(kwords, &ksiz)) != NULL){
        cbdatumcat(datum, "\t", 1);
        cbdatumcat(datum, kbuf, ksiz);
        cbdatumcat(datum, "\t", 1);
        vbuf = cbmapget(kwords, kbuf, ksiz, &vsiz);
        cbdatumcat(datum, vbuf, vsiz);
      }
      cbmapclose(kwords);
      cbdatumcat(datum, "\n", 1);
    }
    est_datum_printf(datum, "\n");
    if(wwidth < 0){
      texts = est_doc_texts(doc);
      for(j = 0; j < cblistnum(texts); j++){
        vbuf = cblistval(texts, j, &vsiz);
        cbdatumcat(datum, vbuf, vsiz);
        cbdatumcat(datum, "\n", 1);
      }
      vbuf = est_doc_hidden_texts(doc);
      if(vbuf[0] != '\0')  est_datum_printf(datum, "\t%s\n", vbuf);
    } else if(wwidth > 0){
      snippet = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
      cbdatumcat(datum, snippet, -1);
      free(snippet);
    }
    est_doc_delete(doc);
    snum++;
  }
  est_datum_printf(datum, "%s:END\n", bordstr);
  if(!cbwritefile(outfile, cbdatumptr(datum), cbdatumsize(datum))) err = TRUE;
  cbdatumclose(datum);
  cblistclose(words);
  free(res);
  cbmapclose(hints);
  est_cond_delete(cond);
  if(!est_db_close(db, &ecode)) err = TRUE;
  return err ? 1 : 0;
}



/* END OF FILE */
