/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2007  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef HEURISTICSCORE_H_
#define HEURISTICSCORE_H_

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "FlagOps.h"
#include <stddef.h>

class URI;
class BString;
struct BStringPOD;

class HeuristicScore
{
public:
	enum Status { NOT_AD, PROBABLY_NOT_AD, PROBABLY_AD, AD };
	
	enum ExpectedType { EXPECT_ANY, EXPECT_IMAGE, EXPECT_FLASH, EXPECT_HTML };
	
	enum UrlRelationship { URLS_SAME_HOST, URLS_RELATED, URLS_UNRELATED };
	
	enum UrlFlags {
		HAS_QUERY            = 1 << 0,
		PROBABLY_HAS_QUERY   = 1 << 1, // like: /script.php/more/data
		HAS_EMBEDDED_URL     = 1 << 2, // like: /script?r=http://addr.com/
		UNEXPECTED_EXTENSION = 1 << 3,
		GIF_EXTENSION        = 1 << 4,
		JPEG_EXTENSION       = 1 << 5,
		HTML_EXTENSION       = 1 << 6,
		CGI_BIN              = 1 << 7, // like: /cgi-bin/script
		FRONT_PAGE           = 1 << 8,
		LOOKS_LIKE_SCRIPT    = HAS_QUERY|PROBABLY_HAS_QUERY|HAS_EMBEDDED_URL
		                       |UNEXPECTED_EXTENSION|CGI_BIN
	};
	
	virtual ~HeuristicScore() {}
	
	virtual int getNumericScore() const = 0;
	
	virtual int getNumericSizeScore() const = 0;
	
	virtual Status getStatus() const;
	
	static UrlFlags getUrlStatus(URI const& url, ExpectedType type = EXPECT_ANY);
	
	static UrlRelationship getUrlRelationship(URI const& url, URI const& base);
	
	static bool isCommonAdSize(int width, int height);
	
	static int getSizeScore(int width, int height);
	
	static int getHintModifier(URI const& url);
private:
	struct SizeRecord;
	struct SizeRecordComparator;

	static UrlRelationship getDomainRelationship(
		char const* d1_begin, char const* d1_end,
		char const* d2_begin, char const* d2_end);
	
	static void chopLeadingWWW(char const*& begin, char const* d1_end);
	
	static void chopTopLevelDomain(char const* begin, char const*& end);
	
	static BString extractLastPathComponent(BString const& path);
	
	static void chopLastPathComponent(BString& path);
	
	static BString extractExtension(BString const& path_component);
	
	static bool extensionMatchesType(
		BString const& extension, ExpectedType type);
	
	static bool findMatchInsensitive(
		BStringPOD const* records, size_t num_records, BString const& subject);
	
	static SizeRecord const* findSizeRecord(int width, int height);
	
	static SizeRecord const m_sCommonSizes[];
};

DEFINE_FLAG_OPS(HeuristicScore::UrlFlags)

#endif
