/*
                                  NETWOX
                             Network toolbox
                Copyright(c) 1999-2005 Laurent Constantin
                                  -----

  Main server    : http://www.laurentconstantin.com/
  Backup servers : http://go.to/laurentconstantin/
                   http://laurentconstantin.est-la.com/
                   http://laurentconstantin.free.fr/
                   http://membres.lycos.fr/lauconstantin/
  [my current email address is on the web servers]

                                  -----
  This file is part of Netwox.

  Netwox is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  version 2 as published by the Free Software Foundation.

  Netwox is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  General Public License for more details (http://www.gnu.org/).

------------------------------------------------------------------------
*/

/*-------------------------------------------------------------*/
#include "../../netwox.h"

/*-------------------------------------------------------------*/
/* information between manager and thread : initialized by manager
   and freed by manager after thread execution */
typedef struct {
  netwib_uint32 threadid;
  netwox_httpclictx *pctx;
  netwib_buf url;
  netwib_uint32 recursionlevel;
  netwox_htmltag_elmtattr elmtattr;
  netwib_buf filename;
  netwox_httphdr_statuscode statuscode;
} netwox_webspidermt_info;

static netwib_err netwox_webspidermt_info_init(netwib_uint32 threadid,
                                               netwox_httpclictx *pctx,
                                               netwib_constbuf *purl,
                                               netwib_uint32 recursionlevel,
                                              netwox_htmltag_elmtattr elmtattr,
                                              netwox_webspidermt_info **ppinfo)
{
  netwox_webspidermt_info *pinfo;

  netwib_er(netwib_ptr_malloc(sizeof(netwox_webspidermt_info),
                              (netwib_ptr*)&pinfo));
  *ppinfo = pinfo;

  pinfo->threadid = threadid;
  pinfo->pctx = pctx;
  netwib_er(netwib_buf_init_mallocdefault(&pinfo->url));
  netwib_er(netwib_buf_append_buf(purl, &pinfo->url));
  pinfo->recursionlevel = recursionlevel;
  pinfo->elmtattr = elmtattr;
  netwib_er(netwib_buf_init_mallocdefault(&pinfo->filename));

  return(NETWIB_ERR_OK);
}

static netwib_err netwox_webspidermt_info_close(netwox_webspidermt_info **ppinfo)
{
  netwox_webspidermt_info *pinfo;

  pinfo = *ppinfo;

  netwib_er(netwib_buf_close(&pinfo->url));
  netwib_er(netwib_buf_close(&pinfo->filename));

  netwib_er(netwib_ptr_free((netwib_ptr*)&pinfo));

  return(NETWIB_ERR_OK);
}

/*-------------------------------------------------------------*/
/* each thread executes this function */
static netwib_err netwox_webspidermt_run_mt_thread(netwib_ptr infosin,
                                                   netwib_ptr *pinfosout)
{
  netwox_webspidermt_info *pinfo = infosin;

  /* download url */
  netwib_er(netwox_url_download_file(pinfo->pctx, &pinfo->url,
                                     &pinfo->filename, &pinfo->statuscode));

  *pinfosout = pinfo;
  return(NETWIB_ERR_OK);
}

/*-------------------------------------------------------------*/
/* manager thread for multi-thread */
static netwib_err netwox_webspidermt_run_mt(netwox_webspider *pwebspider,
                                            netwox_webspidermt_pf pfunc)
{
  netwib_ring *pthreadlist;
  netwib_thread *pthread;
  netwib_ring_index *pringindex;
  netwox_webspidermt_info *pinfo;
  netwox_webspiderurl_item *pitem;
  netwib_uint32 count, threadid, numdots;
  netwib_bool event, newthreadlaunched;
  netwox_scale scale;
  netwox_fraction fraction;
  netwib_err ret=NETWIB_ERR_OK, ret2;

  netwib_er(netwib_threadlist_init(&pthreadlist));
  netwib_er(netwib_ring_index_init(pwebspider->plisturltodownload,
                                   &pringindex));
  netwib_er(netwox_scale_init(0, &scale));
  netwib_er(netwox_fraction_init(&fraction));

  threadid = 1;
  numdots = 0;
  while(NETWIB_TRUE) {
    /* eventually launch a new thread */
    newthreadlaunched = NETWIB_FALSE;
    netwib_er(netwib_ring_ctl_get_count(pthreadlist, &count));
    if (count < pwebspider->maxthreads) {
      ret = netwib_ring_index_next(pringindex, (netwib_ptr*)&pitem);
      if (ret == NETWIB_ERR_DATAEND) {
        netwib_er(netwib_ring_index_ctl_set_rewind(pringindex));
        ret = netwib_ring_index_next(pringindex, (netwib_ptr*)&pitem);
      }
      if (ret == NETWIB_ERR_OK) {
        /* create a new thread */
        netwib_er(netwox_webspidermt_info_init(threadid,
                                               &pwebspider->httpclictx,
                                               &pitem->url,
                                               pitem->recursionlevel,
                                               pitem->elmtattr, &pinfo));
        netwib_er(netwib_buf_append_buf(&pwebspider->localrootdir,
                                        &pinfo->filename));
        netwib_er(netwib_buf_append_fmt(&pinfo->filename,
                                        "/tmp/thread%{uint32}", threadid));
        if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_NORMAL) {
          netwib_er(netwib_fmt_display("GET(%{uint32}) %{buf}\n", threadid,
                                       &pitem->url));
        } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_DOT) {
          netwib_er(netwib_fmt_display("."));
          if (numdots++ == 70) {
            netwib_er(netwib_fmt_display("\n"));
            numdots = 0;
          }
        } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_SCALE) {
          netwib_er(netwox_scale_update(&scale, count+1));
        } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_FRACTION) {
          netwib_er(netwox_fraction_update(&fraction, pwebspider->totalurl-count, pwebspider->totalurl));
        }
        netwib_er(netwib_thread_init(&netwox_webspidermt_run_mt_thread,
                                     (netwib_ptr)pinfo, &pthread));
        netwib_er(netwib_threadlist_add(pthreadlist, pthread, threadid));
        threadid++;
        newthreadlaunched = NETWIB_TRUE;
        /* remove from ring */
        netwib_er(netwib_ring_index_this_del(pringindex, NETWIB_TRUE));
      }
    }
    /* check if one thread exited */
    ret = netwib_threadlist_wait(pthreadlist, newthreadlaunched?NETWIB_TIME_ZERO:NETWIB_TIME_INFINITE, &event, NULL, &ret2, (netwib_ptr*)&pinfo);
    if (ret != NETWIB_ERR_OK) {
      if (ret == NETWIB_ERR_DATANOTAVAIL) continue;
      if (ret == NETWIB_ERR_DATAEND) ret = NETWIB_ERR_OK;
      break;
    }
    if (event) {
      if (ret2 != NETWIB_ERR_OK) {
        ret = ret2;
        break;
      }
      /* call user function */
      if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_NORMAL) {
        netwib_er(netwib_fmt_display("ANALYZE(%{uint32}) %{buf}\n",
                                     pinfo->threadid, &pinfo->url));
      } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_THREAD) {
        netwib_er(netwib_fmt_display("%{uint32}", pinfo->threadid%10));
        if (numdots++ == 70) {
          netwib_er(netwib_fmt_display("\n"));
          numdots = 0;
        }
      }
      ret = (*pfunc)(pwebspider, &pinfo->url, pinfo->recursionlevel,
                     pinfo->elmtattr, &pinfo->filename, pinfo->statuscode);
      if (ret != NETWIB_ERR_OK) {
        break;
      }
      netwib_er(netwox_webspidermt_info_close(&pinfo));
      if (pwebspider->millisleep) {
        netwib_er(netwib_time_sleep_msec(pwebspider->millisleep));
      }
    }
  }

  netwib_er(netwox_fraction_close(&fraction));
  netwib_er(netwox_scale_close(&scale));
  netwib_er(netwib_ring_index_close(&pringindex));
  netwib_er(netwib_threadlist_close(&pthreadlist));

  if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_DOT ||
      pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_THREAD) {
    if (numdots != 0) {
      netwib_er(netwib_fmt_display("\n"));
    }
  }

  return(ret);
}

/*-------------------------------------------------------------*/
/* manager thread for mono-thread */
static netwib_err netwox_webspidermt_run_mono(netwox_webspider *pwebspider,
                                              netwox_webspidermt_pf pfunc)
{
  netwib_bufpool *pbufpool = pwebspider->httpclictx.pbufpool;
  netwib_ring_index *pringindex;
  netwox_webspiderurl_item *pitem;
  netwox_httphdr_statuscode statuscode;
  netwib_uint32 numdots, count;
  netwox_scale scale;
  netwox_fraction fraction;
  netwib_buf *pfilename;
  netwib_err ret=NETWIB_ERR_OK;

  netwib_er(netwib_ring_index_init(pwebspider->plisturltodownload,
                                   &pringindex));
  netwib_er(netwib_bufpool_buf_init(pbufpool, &pfilename));
  netwib_er(netwox_scale_init(1, &scale));
  netwib_er(netwox_fraction_init(&fraction));

  numdots = 0;
  while(NETWIB_TRUE) {
    /* check for new url */
    ret = netwib_ring_index_next(pringindex, (netwib_ptr*)&pitem);
    if (ret == NETWIB_ERR_DATAEND) {
      netwib_er(netwib_ring_index_ctl_set_rewind(pringindex));
      ret = netwib_ring_index_next(pringindex, (netwib_ptr*)&pitem);
    }
    if (ret != NETWIB_ERR_OK) {
      if (ret == NETWIB_ERR_DATAEND) ret = NETWIB_ERR_OK;
      break;
    }
    /* log */
    if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_NORMAL) {
      netwib_er(netwib_fmt_display("GET %{buf}\n", &pitem->url));
    } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_DOT) {
      netwib_er(netwib_fmt_display("."));
      if (numdots++ == 70) {
        netwib_er(netwib_fmt_display("\n"));
        numdots = 0;
      }
    } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_SCALE) {
      netwib_er(netwib_ring_ctl_get_count(pwebspider->plisturltodownload,
                                          &count));
      if (count > scale.maxvalue) scale.maxvalue = 125*count/100;
      netwib_er(netwox_scale_update(&scale, count));
    } else if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_FRACTION) {
      netwib_er(netwib_ring_ctl_get_count(pwebspider->plisturltodownload,
                                          &count));
      netwib_er(netwox_fraction_update(&fraction, pwebspider->totalurl-count, pwebspider->totalurl));
    }
    /* download url */
    netwib__buf_reinit(pfilename);
    netwib_er(netwib_buf_append_buf(&pwebspider->localrootdir, pfilename));
    netwib_er(netwib_buf_append_text("/tmp/get", pfilename));
    netwib_er(netwox_url_download_file(&pwebspider->httpclictx, &pitem->url,
                                       pfilename, &statuscode));
    /* call user function */
    if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_THREAD) {
      netwib_er(netwib_fmt_display("1"));
      if (numdots++ == 70) {
        netwib_er(netwib_fmt_display("\n"));
        numdots = 0;
      }
    }
    ret = (*pfunc)(pwebspider, &pitem->url, pitem->recursionlevel,
                   pitem->elmtattr, pfilename, statuscode);
    if (ret != NETWIB_ERR_OK) {
      break;
    }
    if (pwebspider->millisleep) {
      netwib_er(netwib_time_sleep_msec(pwebspider->millisleep));
    }
    /* remove from ring */
    netwib_er(netwib_ring_index_this_del(pringindex, NETWIB_TRUE));
  }

  netwib_er(netwox_fraction_close(&fraction));
  netwib_er(netwox_scale_close(&scale));
  netwib_er(netwib_bufpool_buf_close(pbufpool, &pfilename));
  netwib_er(netwib_ring_index_close(&pringindex));

  if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_DOT ||
      pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_THREAD) {
    if (numdots != 0) {
      netwib_er(netwib_fmt_display("\n"));
    }
  }

  return(ret);
}

/*-------------------------------------------------------------*/
/* manager thread */
netwib_err netwox_webspidermt_run(netwox_webspider *pwebspider,
                                  netwox_webspidermt_pf pfunc)
{
  netwib_err ret;

  if (pwebspider->maxthreads <= 1) {
    ret = netwox_webspidermt_run_mono(pwebspider, pfunc);
  } else {
    ret = netwox_webspidermt_run_mt(pwebspider, pfunc);
  }

  return(ret);
}
