/*
 * Parser for a serialized tagged collection
 *
 * Copyright (C) 2003  Enrico Zini <enrico@debian.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */

#pragma implementation

#include "TagcollParser.h"
#include "FilterChain.h"
#include "TagcollFilter.h"
#include "Implications.h"
#include "DerivedTags.h"
#include "InputMerger.h"
#include "OpSet.h"
#include "stringf.h"
#include <string>

using namespace std;
using namespace stringf;

namespace TagcollParser
{

// Parse an element
// Return the trailing separating char, that can be:
//  ParserInput::Eof
//  '\n'
//  ':'
//  ','
// Return the item in `item'

// element: \s*[^ \t,:]\s*([.:])\s*
// or
// element: \s*[^ \t,:].*?[^ \t,:]\s*([.:])\s+
int parseElement(ParserInput& in, string& item) throw (ParserException)
{
	item = string();
	string sep;
	int c;
	char sepchar = 0;
	enum {LSPACE, ITEM, ISPACE, ISEP, TSPACE} state = LSPACE;
	while ((c = in.nextChar()) != ParserInput::Eof)
	{
		if (c == '\n')
		{
			if (sepchar && sepchar != ':')
				throw ParserException("separator character ends the line");
			else
				return '\n';
		}
		switch (state)
		{
			// Optional leading space
			case LSPACE:
				switch (c)
				{
					case ' ':
					case '\t':
						break;
					case ':':
					case ',':
						throw ParserException("element cannot start with a separation character");
						break;
					default:
						item += c;
						state = ITEM;
						break;
				}
				break;
			// Non-separating characters
			case ITEM:
				switch (c)
				{
					case ' ':
					case '\t':
						sep += c;
						state = ISPACE;
						break;
					case ':':
					case ',':
						sepchar = c;
						sep += c;
						state = ISEP;
						break;
					default:
						item += c;
						break;
				}
				break;
			// Space inside item or at the end of item
			case ISPACE:
				switch (c)
				{
					case ' ':
					case '\t':
						sep += c;
						break;
					case ':':
					case ',':
						sepchar = c;
						state = TSPACE;
						break;
					default:
						item += sep;
						item += c;
						sep = string();
						state = ITEM;
						break;
				}
				break;
			// Separator inside item or at the end of item
			case ISEP:
				switch (c)
				{
					case ' ':
					case '\t':
						if (sep.size() > 1)
							throw ParserException("item is followed by more than one separator characters");
						state = TSPACE;
						break;
					case ':':
					case ',':
						sep += c;
						break;
					default:
						item += sep;
						item += c;
						sepchar = 0;
						sep = string();
						state = ITEM;
						break;
				}
				break;
			case TSPACE:
				switch (c)
				{
					case ' ':
					case '\t':
						break;
					default:
						in.pushChar(c);
						return sepchar;
				}
				break;
		}
	}
	return ParserInput::Eof;
}

// item1, item2, item3: tag1, tag2, tag3

//#define TRACE_PARSE
void parseTagcoll(ParserInput& in, TagcollConsumer<std::string>& consumer) throw (ParserException)
{
	string item;

	OpSet<string> itemset;
	OpSet<string> tagset;
	int sep;
	enum {ITEMS, TAGS} state = ITEMS;
	int line = 1;
	do
	{
		try {
			sep = parseElement(in, item);
		} catch (ParserException& e) {
			// Add the line number and propagate
			e.line(line);
			throw e;
		}
		
		if (item.size() != 0)
			if (state == ITEMS)
				itemset += item;
			else
				tagset += item;
		
		switch (sep)
		{
			case '\n':
				line++;
			case ParserInput::Eof:
				if (!(itemset.empty() && tagset.empty()))
				{
					if (itemset.empty())
						throw ParserException(line, "no elements before `:' separator");
					if (tagset.empty())
						consumer.consume(itemset);
					else
						consumer.consume(itemset, tagset);
				}
				itemset.clear();
				tagset.clear();
				state = ITEMS;
				break;
			case ':':
				if (state == TAGS)
					throw ParserException(line, "separator `:' appears twice");
				state = TAGS;
				break;
			default:
				break;
		}
	} while (sep != ParserInput::Eof);
}

void parseTagcoll(ParserInput& in, ParserInput& impls, ParserInput& dervs, TagcollConsumer<std::string>& consumer) throw (ParserException)
{
	// Prepare the input filter chain
	FilterChain<string> filters;

	// Expand implications
	ImplicationList implications;
	TagcollParser::parseTagcoll(impls, implications);
	// Pack the structure for faster expansions
	implications.pack();
	TagcollFilter<string>* ximpl = new ApplyImplications(implications);
	filters.appendFilter(ximpl);

	// Add derived tags
	DerivedTagList derivedTags;
	derivedTags.parse(dervs);
	TagcollFilter<string>* xderv = new ApplyDerivedTags(derivedTags);
	filters.appendFilter(xderv);

	// Add further tags implicated by the derived tags
	filters.appendFilter(ximpl);

	// Read and expand the database
	InputMerger<string> merger;
	filters.setConsumer(&merger);
	TagcollParser::parseTagcoll(in, filters);

	delete ximpl;
	delete xderv;

	// Output the merged collection to consumer
	merger.output(consumer);
}

};

// vim:set ts=4 sw=4:
