/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2006  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef LEXGEN_H_
#define LEXGEN_H_

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include "Nfa.h"
#include "NfaConcatenation.h"

class LexGen
{
public:
	class Options
	{
	public:
		Options() : m_isLazy(false), m_isLazyOnBlockEnd(false)  {}
		Options& setLazy() { m_isLazy = true; return *this; }
		Options& setLazyOnBlockEnd() { m_isLazyOnBlockEnd = true; return *this; }
		bool isLazy() const { return m_isLazy; }
		bool isLazyOnBlockEnd() const { return m_isLazyOnBlockEnd; }
	private:
		bool m_isLazy;
		bool m_isLazyOnBlockEnd; // makes no sense if m_isLazy is set
		/*
		LazyOnBlockEnd is a more of a hack than a feature. A Lazy rule is always
		preferred over a normal rule, but the LazyOnBlockEnd flag doesn't change
		the rule's priority (because it would have to be done at run time).
		So, in the following example:
		--------------------------------------------------------
		addRule(0, NfaClosure(NfaChar(' ')), "some_code();");
		addRule(0, NfaClosure(NfaChar(' ')), "other_code();").setLazyOnBlockEnd();
		--------------------------------------------------------
		the second rule will never match, be it the end of block or not.
		*/
	};
	LexGen(const char* core_class, const char* subclass);
	Options& addRule(int scond, const Nfa& nfa, const std::string& action);
	Options& addRule(int scond, const Nfa& nfa, const Nfa& trailer, const std::string& action);
	void includeCode(const char* code) { m_included_code += code; }
	void trackStreamPosition(bool val = true) { m_trackStreamPosition = val; }
	void trackLineCol(bool val = true) { m_trackLineCol = val; }
	void writeLexer(std::ostream& header, std::ostream& impl,
		const char* def_class, const char* def_header, const char* subclass_header);
private:
	class Rule
	{
	public:
		Rule(int id, int scond, const std::string& action, const Nfa& nfa);
		Rule(int id, int scond, const std::string& action, const Nfa& nfa, const Nfa& trailer);
		Rule(const Rule& other);
		~Rule();
		Rule& operator=(const Rule& other);
		int m_id;
		int m_scond;
		std::string m_action;
		NfaConcatenation m_nfa;
		NfaConcatenation* m_pTrailer;
		Options m_options;
	};
	
	class NStateInfo
	{
	public:
		NStateInfo(int rule, bool trailer) : m_rule(rule), m_isTrailer(trailer),
			m_isQuickFinish(false), m_isSemiFinish(false), m_isLongFinish(false) {}
		int m_rule;
		bool m_isTrailer;
		bool m_isQuickFinish;
		bool m_isSemiFinish;
		bool m_isLongFinish;
		// only one of the finishes may be set
	};
	
	struct LexerRuleComposition
	{
		LexerRuleComposition(const std::list<Rule*>& rules);
		int getStartPos() const { return 0; }
		int getFinishPos() const { return 1; }
		std::vector<NfaState> m_states;
		std::vector<NStateInfo> m_stateInfos;
	};
	friend struct LexGen::LexerRuleComposition;
	
	class DState
	{
	public:
		DState(int scond);
		bool operator==(const std::set<int>& stateset) const { return m_stateset==stateset; }
		bool operator!=(const std::set<int>& stateset) const { return m_stateset!=stateset; }
		std::set<int> m_stateset;
		int m_finishFor;
		bool m_isQuickFinish;
		std::set<int> m_semiFinishFor;
		bool m_isDeadEnd;
		DState* m_moves[256];
		int m_scond;
	};
	
	typedef std::map<int, Rule> rules_by_id_type;
	typedef std::map<int, std::list<Rule*> > rules_by_scond_type;
	static std::set<int> eClosure(const NfaState* states, int from);
	static std::set<int> eClosure(const NfaState* states, const std::set<int>& from);
	static std::set<int> move(const NfaState* states, const std::set<int>& from, unsigned char ch);
	static const char* getTypeForRange(int min, int max);
	void checkForDangerousTrailingContexts(const DState& src_state,
		const DState& dst_state, const LexerRuleComposition& composition);
	void writeSkel(std::ostream& out, const char** skel, const std::map<std::string, std::string>& substitutions);
	
	std::string m_core_class;
	std::string m_subclass;
	std::string m_included_code;
	bool m_trackStreamPosition;
	bool m_trackLineCol;
	rules_by_id_type m_rules_by_id;
	rules_by_scond_type m_rules_by_scond;
	int m_rule_id_generator;
};

#endif
