/*
                                  NETWOX
                             Network toolbox
                Copyright(c) 1999-2006 Laurent Constantin
                                  -----

  Main server    : http://www.laurentconstantin.com/
  Backup servers : http://go.to/laurentconstantin/
                   http://laurentconstantin.est-la.com/
                   http://laurentconstantin.free.fr/
                   http://membres.lycos.fr/lauconstantin/
  [my current email address is on the web servers]

                                  -----
  This file is part of Netwox.

  Netwox is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  version 2 as published by the Free Software Foundation.

  Netwox is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  General Public License for more details (http://www.gnu.org/).

------------------------------------------------------------------------
*/

/*-------------------------------------------------------------*/
#include "../../netwox.h"

/*-------------------------------------------------------------*/
netwib_err netwox_webspiderurl_item_create(netwib_uint32 recursionlevel,
                                           netwox_htmltag_elmtattr elmtattr,
                                           netwib_constbuf *purl,
                                           netwib_ptr *ppitem)
{
  netwox_webspiderurl_item *pitem;

  netwib_er(netwib_ptr_malloc(sizeof(netwox_webspiderurl_item), ppitem));
  pitem = *ppitem;

  pitem->recursionlevel = recursionlevel;
  pitem->elmtattr = elmtattr;
  netwib_er(netwib_buf_init_mallocdefault(&pitem->url));
  netwib_er(netwib_buf_append_buf(purl, &pitem->url));

  return(NETWIB_ERR_OK);
}

/*-------------------------------------------------------------*/
netwib_err netwox_webspiderurl_item_erase(netwib_ptr pitem)
{
  netwox_webspiderurl_item *purlitem;

  purlitem = (netwox_webspiderurl_item *)pitem;
  netwib_er(netwib_buf_close(&purlitem->url));

  netwib_er(netwib_ptr_free(&pitem));
  return(NETWIB_ERR_OK);
}

/*-------------------------------------------------------------*/
netwib_err netwox_webspiderurl_add(netwox_webspider *pwebspider,
                                   netwib_uint32 recursionlevel,
                                   netwox_htmltag_elmtattr elmtattr,
                                   netwib_constbuf *purl)
{
  netwox_htmltag_elmtattr_uricontain uricontain;
  netwib_ptr pitem;
  netwib_buf url;
  netwib_data data, pc;
  netwib_uint32 datasize;
  netwib_bool match;

  /* check type */
  netwib_er(netwox_htmltag_uricontain_init_elmtattr(elmtattr, &uricontain));
  switch(uricontain) {
  case NETWOX_HTMLTAG_ELMTATTR_URICONTAIN_HTML :
    break;
  case NETWOX_HTMLTAG_ELMTATTR_URICONTAIN_IMAGE :
    if (!pwebspider->getimage) {
      return(NETWIB_ERR_OK);
    }
    break;
  case NETWOX_HTMLTAG_ELMTATTR_URICONTAIN_MULTIMEDIA :
    if (!pwebspider->getmultimedia) {
      return(NETWIB_ERR_OK);
    }
    break;
  case NETWOX_HTMLTAG_ELMTATTR_URICONTAIN_CODE :
    if (!pwebspider->getcode) {
      return(NETWIB_ERR_OK);
    }
    break;
  default :
    return(NETWIB_ERR_OK);
  }

  /* check recursion level */
  if(pwebspider->maxrecursion != 0) {
    if (recursionlevel == pwebspider->maxrecursion + 1) {
      if (uricontain == NETWOX_HTMLTAG_ELMTATTR_URICONTAIN_HTML) {
        return(NETWIB_ERR_OK);
      }
    } else if (recursionlevel > pwebspider->maxrecursion) {
      return(NETWIB_ERR_OK);
    }
  }

  /* suppress fragment */
  url = *purl;
  data = netwib__buf_ref_data_ptr(&url);
  datasize = netwib__buf_ref_data_size(&url);
  pc = netwib_c_memchr(data, '#', datasize);
  if (pc != NULL) {
    url.endoffset = url.beginoffset + pc - data;
  }

  /* check if already in the hash */
  netwib_er(netwib_hash_contains(pwebspider->plistallurl, &url, &match));
  if (match) {
    return(NETWIB_ERR_OK);
  }

  /* check ACL */
  if (pwebspider->aclurlset) {
    netwib_er(netwox_rules_match(pwebspider->paclurl, 1, &url, &match));
    if (!match) {
      return(NETWIB_ERR_OK);
    }
  }

  /* add in the ring and in the hash */
  if (pwebspider->loglevel == NETWOX_WEBSPIDER_LOGLEVEL_NORMAL) {
    netwib_er(netwib_fmt_display("ADD %{buf}\n", &url));
  }
  netwib_er(netwox_webspiderurl_item_create(recursionlevel, elmtattr, &url,
                                            &pitem));
  netwib_er(netwib_ring_add_last(pwebspider->plisturltodownload, pitem));
  netwib_er(netwib_hash_add(pwebspider->plistallurl, &url,
                            (netwib_constptr)(netwib_uintptr)uricontain,
                            NETWIB_FALSE));
  pwebspider->totalurl++;

  return(NETWIB_ERR_OK);
}

