/*
 * tagged collection - Experimental programs to test and study tagged collections
 *
 * Copyright (C) 2003,2004,2005  Enrico Zini
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#define APPNAME PACKAGE
#else
#warning No config.h found: using fallback values
#define APPNAME __FILE__
#define VERSION "unknown"
#endif

#include "CommandlineParser.h"

#include <stdio.h>

#include <stdlib.h>	// getenv

#include <errno.h>

#include <tagcoll/stringf.h>
#include <tagcoll/Exception.h>

#include <tagcoll/CardinalityStore.h>
#include <tagcoll/SmartHierarchy.h>

#include <tagcoll/Consumer.h>
#include <tagcoll/Filter.h>
#include <tagcoll/InputMerger.h>
#include <tagcoll/Implications.h>
#include <tagcoll/Filters.h>
#include <tagcoll/Patches.h>
#include <tagcoll/DerivedTags.h>
#include <tagcoll/ItemGrouper.h>

#include <tagcoll/StdioParserInput.h>
#include <tagcoll/TextFormat.h>
#include <tagcoll/Serializer.h>
#include <tagcoll/Expression.h>

#include <algorithm>

using namespace std;
using namespace Tagcoll;

void printItems(const set<string>& items, const string& prefix = "")
{
	for (set<string>::const_iterator i = items.begin();
			i != items.end(); i++)
	{
		printf("%.*s%.*s\n", PFSTR(prefix), PFSTR(*i));
	}
}

void printNode(HierarchyNode<string, string>* node, string prefix = "")
{
	OpSet<string> items = node->getItems();
	if (!items.empty())
	{
		printItems(items, prefix + ": ");
//	} else {
//		printf("%.*s: no items\n", PFSTR(prefix));
	}
	
	if (prefix.size() > 0 && prefix[prefix.size() - 1] != '/')
		prefix += '/';

	for (HierarchyNode<string, string>::iterator i = node->begin();
			i != node->end(); i++)
	{
		printNode(*i, prefix + (*i)->tag());
	}
}

OpSet<string> getItems(HierarchyNode<string, string>* node)
{
	OpSet<string> items = node->getItems();
	
	for (HierarchyNode<string, string>::iterator i = node->begin();
			i != node->end(); i++)
		items += getItems(*i);

	return items;
}

void readCollection(const string& file, Consumer<string, string>& builder)
	throw (FileException, ParserException)
{
	Converter<string, string> conv;

	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		TextFormat<string, string>::parse(conv, conv, input, builder);
	}
	else
	{
		StdioParserInput input(file);
		TextFormat<string, string>::parse(conv, conv, input, builder);
	}
}

PatchList<string, string> readPatches(const string& file)
	throw (FileException, ParserException)
{
	Converter<string, string> conv;

	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		return TextFormat<string, string>::parsePatch(conv, conv, input);
	}
	else
	{
		StdioParserInput input(file);
		return TextFormat<string, string>::parsePatch(conv, conv, input);
	}
}

void parseDerivedTags(ParserInput& in, DerivedTags& output)
{
	string tag;
	string expr;

	int c;
	enum {TAG, TAGCOLON, SEXPR, EXPR} state = TAG;
	int line = 1;
	while ((c = in.nextChar()) != ParserInput::Eof)
	{
		if (c == '\n')
		{
			if (tag.size() > 0 && expr.size() > 0)
				output.add(tag, expr);
			else
				fprintf(stderr, "In derived tags file, ignoring incomplete line %d.\n", line);
			tag = string();
			expr = string();
			state = TAG;
			line++;
		} else
			switch (state)
			{
				// Read item
				case TAG:
					switch (c)
					{
						case ':':
							state = TAGCOLON;
							break;
						default:
							tag += c;
							break;
					}
					break;
				// After colon on item
				case TAGCOLON:
					switch (c)
					{
						case ' ':
						case '\t':
							state = SEXPR;
							break;
						case ':':
							tag += c;
							break;
						default:
							tag += ':';
							tag += c;
							state = EXPR;
							break;
					}
					break;
				// Space before tag
				case SEXPR:
					switch (c)
					{
						case ' ':
						case '\t':
							break;
						default:
							expr += c;
							state = EXPR;
							break;
					}
					break;
				// Read tag
				case EXPR:
					expr += c;
					break;
			}
	}
}

void readDerivedTags(const string& file, DerivedTags& derivedTags)
	throw (FileException)
{
	if (file == "-")
	{
		StdioParserInput input(stdin, "<stdin>");
		parseDerivedTags(input, derivedTags);
	}
	else
	{
		StdioParserInput input(file);
		parseDerivedTags(input, derivedTags);
	}
}

class CommandlineParserWithCommand : public CommandlineParser
{
protected:
	map<string, int> command_map;

public:
	CommandlineParserWithCommand(const std::string& argv0,
									const std::string& cmdline_summary,
									const std::string& description) throw ()
		: CommandlineParser(argv0, cmdline_summary, description)
	{
		add("version", 0, "version", "print the program version, then exit");
	}

	void addCommand(const std::string name, int id) throw ()
	{
		command_map[name] = id;
	}

	int parse(int& argc, const char**& argv) throw ()
	{
		if (!CommandlineParser::parse(argc, argv))
		{
			printHelp();
			return false;
		}
		if (get("help").defined())
		{
			printHelp();
			exit(0);
		}
		if (get("version").defined())
		{
			printf("%s ver." VERSION "\n", APPNAME);
			exit(0);
		}
		if (argc == 1)
		{
			printHelp();
			exit(1);
		}

		string command_string = argv[1];
		map<string, int>::const_iterator cmap_i = command_map.find(command_string);
		if (cmap_i == command_map.end())
		{
			fprintf(stderr, "Invalid command: \"%.*s\"\n", PFSTR(command_string));
			printHelp();
			exit(1);
		}

		for (int i = 1; i < argc; i++)
			argv[i] = argv[i + 1];
		--argc;
		
		return cmap_i->second;
	}
};

class CommandlineArgs
{
protected:
	int argc;
	const char** argv;
	int _next;

public:
	CommandlineArgs(int argc, const char* argv[]) throw () : argc(argc), argv(argv), _next(1) {}

	// Return true if there is another argument left in the list
	bool hasNext() const throw () { return argc >= _next + 1; }

	// Return the next argument in the list
	string next() throw ()
	{
		if (hasNext())
		{
			return argv[_next++];
		} else {
			return "-";
		}
	}
};

enum valid_command { COPY, DIFF, RELATED, IMPLICATIONS, HIERARCHY, CLEANHIERARCHY, REVERSE, FINDSPECIALS };

int main(int argc, const char* argv[])
{
	try {
		CommandlineParserWithCommand opts(APPNAME, "[options] [command] [file1|-] [file2|-]",
				"Perform various operations on a tagged collection\n\n"
				"Commands are:\n"
				"  copy         output the collection\n"
				"  reverse      \"reverse\" the collection, outputting one with items\n"
				"               associated to tags\n"
				"  diff         output a tag patch file with the differences between two files\n"
				"  related      print a list of items related to the given one\n"
				"  implications compute a list of tag implications\n"
				"  hierarchy    build a smart hierarchy with the collection data\n"
				"  cleanhierarchy build a cleaned smart hierarchy with the collection data\n"
				"  findspecials generate a smart hierarchy and print, for each toplevel tag,\n"
				"               what are the items that make it toplevel instead of going below\n"
				"               another tag.\n");

		/*
		opts.add("hierarchy", 's', "smart-hierarchy", "build a smart hierarchy");
		opts.add("implications", 'm', "show-implications", "output a list of tag implications");
		opts.add("copy", 'c', "copy", "output the collection");
		opts.add("diff", 'd', "diff", "output a tag patch file with the differences between two files (requires two file arguments)");
		*/

		opts.add("derived", 'e', "derived-tags-from", "use an external list of derived tags", "file");
		opts.add("extimpl", 'i', "implications-from", "use an external list of implications", "file");
		opts.add("rename", 'r', "rename-from", "rename tags using the given mapping list", "file");
		opts.add("patch", 'p', "patch-with", "apply patches from the given tag patch file", "file");

		opts.add("expanded", 'x', "expanded-output", "produce full (and redundant) output data instead of compact");
		opts.add("groupitems", 'g', "group-items", "group items with the same tagset in the output collection");
		opts.add("distance", 'd', "distance", "set the maximum distance to use for the \"related\" command (defaults to 0)", "num");
		opts.add("flatten", 0, "flatten-threshold", "set the number of total items below which a branch is flattened when using the \"hierarchy\" command (defaults to 0, meaning \"don't flatten\")", "num");
		opts.add("filter", 'f', "filter", "filter out the tags with cardinality less than the given value (defaults to not filter; currently only works when building hierarchies)", "num");
		opts.add("untagged-tag", 0, "untagged-tag", "set item name to use for associating untagged items when using the \"reverse\" command.  If not specified, untagged items are not included in the output.", "tag");
		opts.add("remove-unfaceted", 0, "remove-unfaceted", "while parsing, remove all tags with no facet part.");
		opts.add("remove-tags", 0, "remove-tags", "while parsing, remove all tags matching the given tag expression.", "expression");
		
		//opts.add("verbose", 'v', "verbose", "enable verbose output");
		//opts.add("debug", 0, "debug", "enable debugging output (including verbose output)");

		opts.addCommand("copy", (int)COPY);
		opts.addCommand("reverse", (int)REVERSE);
		opts.addCommand("diff", (int)DIFF);
		opts.addCommand("related", (int)RELATED);
		opts.addCommand("implications", (int)IMPLICATIONS);
		opts.addCommand("hierarchy", (int)HIERARCHY);
		opts.addCommand("cleanhierarchy", (int)CLEANHIERARCHY);
		opts.addCommand("findspecials", (int)FINDSPECIALS);

		// Process the commandline
		valid_command cmd = (valid_command)opts.parse(argc, argv);

		CommandlineArgs args(argc, argv);
		
		string item;
		if (cmd == RELATED)
			item = args.next();
		string file1 = args.next();

		//fprintf(stderr, "argc: %d file: %.*s\n", argc, PFSTR(file));

		// Prepare the input filter chain
		FilterChain<string, string> filters;
		Substitute<string, string> substitutions;
		PatchList<string, string> patches;
		Implications<string> implications;
		DerivedTags derivedTags;

		if (opts.get("rename").defined())
		{
			readCollection(opts.get("rename").stringVal(), substitutions.substitutions());
			filters.appendFilter(substitutions);
		}
		if (opts.get("patch").defined())
		{
			patches = readPatches(opts.get("patch").stringVal());
			filters.appendFilter(patches);
		}

		if (opts.get("extimpl").defined())
		{
			readCollection(opts.get("extimpl").stringVal(), implications);
			// Pack the structure for faster expansions
			implications.pack();
		}
		if (opts.get("derived").defined())
			readDerivedTags(opts.get("derived").stringVal(), derivedTags);

		// Intermix implications and derived tags as seems best
		bool compressOutput = (cmd == COPY && !opts.get("expanded").defined());
		bool hasImpl = opts.get("extimpl").defined();
		bool hasDerv = opts.get("derived").defined();

		AddImplied<string, string> addImplied(implications);
		RemoveImplied<string, string> removeImplied(implications);
		AddDerived<string> addDerived(derivedTags);
		RemoveDerived<string> removeDerived(derivedTags);

		if (compressOutput)
		{
			if (hasDerv)
			{
				// Expand implications
				if (hasImpl) filters.appendFilter(addImplied);

				// Remove derived tags computing them using the expanded tag set
				filters.appendFilter(removeDerived);
			}

			// Compress implications
			if (hasImpl) filters.appendFilter(removeImplied);
		} else {
			// Expand implications
			if (hasImpl) filters.appendFilter(addImplied);

			// Add derived tags computing them using the expanded tag set
			if (hasDerv)
			{
				filters.appendFilter(addDerived);

				// Add further tags implicated by the derived tags
				if (hasImpl) filters.appendFilter(addImplied);
			}
		}

		UnfacetedRemover<string> unfacetedRemover("::");
		if (opts.get("remove-unfaceted").defined())
			filters.appendFilter(unfacetedRemover);

		FilterTagsByExpression<string, string> filterByExpression("");
		if (opts.get("remove-tags").defined())
		{
			filterByExpression.setExpression(opts.get("remove-tags").stringVal()); 
			filters.appendFilter(filterByExpression);
		}

		InputMerger<string, string> merger;
		filters.setConsumer(merger);
		readCollection(file1, filters);

		// Perform the correct operation
		switch (cmd)
		{
			case IMPLICATIONS:
			{
				CardinalityStore<string, string> coll;
				merger.output(coll);

				Implications<string> newImpls;

				// Find tag implications
				OpSet<string> allTags = coll.getAllTags();
				for (OpSet<string>::const_iterator t = allTags.begin();
						t != allTags.end(); t++)
				{
					OpSet<string> implied = coll.getImpliedBy(*t);
					if (!implied.empty())
						newImpls.consume(*t, implied);
				}

				newImpls.pack();
				
				Converter<string, string> conv;
				TextFormat<string, string> output(conv, conv, stdout);
				if (opts.get("expanded").defined())
					newImpls.outputFull(output);
				else
					newImpls.output(output);
				break;
			}
			case HIERARCHY:
			{
				int flattenThreshold = 0;
				if (opts.get("flatten").defined())
					flattenThreshold = opts.get("flatten").intVal();

				CardinalityStore<string, string> coll;
				merger.output(coll);

				if (opts.get("filter").defined())
					coll.removeTagsWithCardinalityLessThan(opts.get("filter").intVal());

				// Default operation: build the smart hierarchy
				HierarchyNode<string, string>* root =
					new SmartHierarchyNode<string, string>("_top", coll, flattenThreshold);
				printNode(root, "/");
				break;
			}
			case CLEANHIERARCHY:
			{
				int flattenThreshold = 0;
				if (opts.get("flatten").defined())
					flattenThreshold = opts.get("flatten").intVal();

				CardinalityStore<string, string> coll;
				merger.output(coll);

				if (opts.get("filter").defined())
					coll.removeTagsWithCardinalityLessThan(opts.get("filter").intVal());

				// Default operation: build the smart hierarchy
				HierarchyNode<string, string>* root = new CleanSmartHierarchyNode<string, string>("_top", coll, flattenThreshold);
				printNode(root, "/");
				break;
			}
			case DIFF:
			{
				InputMerger<string, string> merger2;
				filters.setConsumer(merger2);
				readCollection(args.next(), filters);

				PatchList<string, string> newpatches;
				newpatches.addPatch(merger, merger2);

				Converter<string, string> conv;
				TextFormat<string, string>::outputPatch(conv, conv, newpatches, stdout);
				break;
			}
			case RELATED:
			{
				int maxdist = 0;
				if (opts.get("distance").defined())
					maxdist = opts.get("distance").intVal();

				// Split the items on commas
				string splititem;
				set<string> splititems;
				for (string::const_iterator c = item.begin();
						c != item.end(); c++)
					if (*c == ',')
					{
						if (!merger.hasItem(splititem))
						{
							fprintf(stderr, "Item \"%.*s\" does not exist in the collection\n", PFSTR(splititem));
							return 1;
						}
						splititems.insert(splititem);
						splititem = string();
					} else
						splititem += *c;
				if (!merger.hasItem(splititem))
				{
					fprintf(stderr, "Item \"%.*s\" does not exist in the collection\n", PFSTR(splititem));
					return 1;
				}
				splititems.insert(splititem);
				
				// Get the tagset as the intersection of the tagsets of all input items
				set<string>::const_iterator i = splititems.begin();
				OpSet<string> ts = merger.getTags(*i);
				for (++i; i != splititems.end(); i++)
					ts = ts ^ merger.getTags(*i);

				if (ts.empty())
				{
					if (splititems.size() > 1)
						fprintf(stderr, "The items %.*s are unrelated: cannot find a barycenter to start computing relationships from.\n", PFSTR(item));
					else
						fprintf(stderr, "The items %.*s has no tags attached.\n", PFSTR(item));
					return 1;
				}
				
				// Build a full TagCollection
				CardinalityStore<string, string> coll;
				merger.output(coll);

				printItems(coll.getItemsExactMatch(ts));

				if (maxdist)
				{
					// Get the related tagsets
					list< OpSet<string> > rel = coll.getRelatedTagsets(ts, maxdist);

					// Print the output list
					for (list< OpSet<string> >::const_iterator i = rel.begin();
							i != rel.end(); i++)
						printItems(coll.getItemsExactMatch(*i));
				}
				break;
			}
			case REVERSE:
			{
				ItemGrouper<string, string> reverser;
				merger.output(reverser);

				/*
				if (opts.get("untagged-tag").defined())
					reverser.setUntaggedItemName(opts.get("untagged-tag").stringVal());
				 */

				Converter<string, string> conv;
				TextFormat<string, string> writer(conv, conv, stdout);
				if (opts.get("groupitems").defined())
				{
					reverser.outputReversed(writer);
				} else
					// FIXME: we need a filter that does the opposite of ItemGrouper
					reverser.outputReversed(writer);
				break;
			}
			case COPY:
			{
				Converter<string, string> conv;
				TextFormat<string, string> writer(conv, conv, stdout);
				if (opts.get("groupitems").defined())
				{
					ItemGrouper<string, string> grouper;
					merger.output(grouper);
					grouper.output(writer);
				} else
					merger.output(writer);
				break;
			}
			case FINDSPECIALS:
			{
				int flattenThreshold = 0;
				if (opts.get("flatten").defined())
					flattenThreshold = opts.get("flatten").intVal();

				CardinalityStore<string, string> coll;
				merger.output(coll);

				if (opts.get("filter").defined())
					coll.removeTagsWithCardinalityLessThan(opts.get("filter").intVal());

				// Default operation: build the smart hierarchy
				SmartHierarchyNode<string, string> root("_top", coll, flattenThreshold);

				OpSet<string> seen;
				for (HierarchyNode<string, string>::iterator i = root.begin();
						i != root.end(); i++)
				{
					OpSet<string> items = getItems(*i);

					// Find the items in this branch that are not present in
					// any of the previous ones
					OpSet<string> newItems;
					if (!seen.empty())
					{
						for (OpSet<string>::const_iterator j = items.begin();
								j != items.end(); j++)
						{
							OpSet<string> tags = merger.getTags(*j) ^ seen;
							if (tags.empty())
								newItems += *j;
						}

						printf("%.*s: %d items, %d special items:\n",
								PFSTR((*i)->tag()), items.size(), newItems.size());

						int indent = (*i)->tag().size() + 2;
						for (OpSet<string>::const_iterator j = newItems.begin(); j != newItems.end(); j++)
							printf("%*s%.*s\n", indent, "", PFSTR(*j));
					}

					seen += (*i)->tag();
				}

				break;
			}
		}

		return 0;
	}
	catch (Exception& e)
	{
		fprintf(stderr, "%s: %.*s\n", e.type(), PFSTR(e.desc()));
		return 1;
	}
}

// vim:set ts=4 sw=4:
