#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <ncurses.h>
#include "minihtml.h"

#define	strequal(s1, s2)	(strcmp(s1, s2) == 0)

// stuff from linuxtrade.h that shouldn't be here...
extern int	Reverse;
extern int	Debug;
#define	CYANonBLACK	6

void
minihtml_skip_past_line(FILE *fp, char *str)
{
	char	buf[BUFSIZ];
	int	len = strlen(str);

	while (fgets(buf, sizeof(buf), fp))
	{
		if (strncmp(buf, str, len) == 0)
			break;
		if (Debug)
		{
			char	*p;
			p = strchr(buf, '\r'); if (p) *p = 0;
			p = strchr(buf, '\n'); if (p) *p = 0;
			fprintf(stderr, "SKIP: <%s>\n", buf);
		}
	}
}

static void
recenter(WINDOW *win)
{
	int	y, x;
	int	i, cnt;

	y = getcury(win);
	x = getcurx(win);

	if (Debug)
		fprintf(stderr, "recenter line %d at %d\n", y, x);

	cnt = (getmaxx(win) - x) / 2;
	wmove(win, y, 0);

	for (i = 0; i < cnt; ++i)
		winsch(win, ' ');
	wmove(win, y, x+cnt);
}

void
minihtml_newline(WINDOW *win)
{
	attr_t	boldorrev = Reverse ? A_REVERSE : A_BOLD;

	if (getcurx(win) == 0)
		waddch(win, ' ' | boldorrev);	// hack
	waddch(win, '\n');
}

int
minihtml_getword(char *buf, int size, FILE *fp)
{
	int	c;
	int	len = 0;
	int	state = 0;
	char	*tagp = buf;
	int	anyspace = 0;

	// eat leading whitespace
eat_leading:
	for (;;)
	{
		c = fgetc(fp);
		if (c == EOF)
		{
			buf[len] = 0;
			return len;
		}
		if (c == ' ' || c == '\t')
			anyspace = 1;
		if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
		{
			ungetc(c, fp);
			break;
		}
	}

	if (anyspace)
	{
		strcpy(buf, " ");
		return 1;
	}

	while ((c = fgetc(fp)) != EOF)
	{
		// Return the HTML tag start character as a 1 char word
		if (len == 0 && c == '<')
		{
			if (anyspace)
			{
				ungetc(c, fp);
				strcpy(buf, " ");
			}
			else
				strcpy(buf, "<");
			return 1;
		}

		// Whitespace or tag start character breaks a word
		if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '<')
		{
			if (len == 0)
				goto eat_leading;
			ungetc(c, fp);
			buf[len] = 0;
			return len;
		}

		switch (state)
		{
		case 0:
			if (c == '&')
			{
				state = '&';
				tagp = buf+len;
				break;
			}

			buf[len++] = c;
			if (len == size)
				return len;
			break;
		case '&':
			if (c == ';')
			{
				*tagp = 0;
				state = 0;
				if (strcmp(buf+len, "AMP") == 0)
					buf[len++] = '&';
				else if (strcmp(buf+len, "NBSP") == 0)
					buf[len++] = ' ';
				if (len == size)
					return len;
			}
			else
				*tagp++ = toupper(c);
			break;
		}
	}

	if (Debug)
		fprintf(stderr, "IP: EOF\n");
	buf[len] = 0;
	return len;
}

static int
numparm(char *buf, char *name, int defalt)
{
	char	*p;

	if (!buf) return defalt;

	p = strstr(buf, name);
	if (!p) return defalt;

	p = strchr(p, '=');
	if (!p) return defalt;
	++p;
	if (*p == '"') ++p;
	return atoi(p);
}

void
minihtml_colorcol(WINDOW *win, TBLCOL *cols, int col, attr_t color)
{
	int	ox;
	int	x;
	int	y;

	ox = getcurx(win);
	y = getcury(win);

	wcolor_set(win, color, NULL);
	for (x = cols[col].sx; x <= cols[col].ex; ++x)
	{
		chtype	ch = mvwinch(win, y, x);
		ch &= 0xff;
		mvwaddch(win, y, x, ch);
	}
	wmove(win, y, ox);
	wattrset(win, A_NORMAL);
}

void minihtml_parse(

	WINDOW *win, FILE *fp,
	TBLCOL *cols, int numcol,
	int flags,
	int (*prewordhook)(int *tblcolp, char *wbuf, int wlen),
	void (*postwordhook)(int *tblcolp, char *wbuf, int wlen),
	void (*anchorhook)(char *parmp, int tblcol, int slash),
	int (*tablehook)(int tblcnt, TBLCOL **colsp, int *numcolp, int slash)
	)
{
	int	x, y;
	char	buf[BUFSIZ];
	char	wbuf[BUFSIZ];
	int	wlen;
	int	state;
	int	c;
	char	*tagp;
	char	*parmp;
	int	slash;
	int	pre;
	int	center;
	int	underlined;
	int	i;
	int	tblcol;
	int	needspace;
	int	blank;
	int	tblcnt;

	/*
	 * Simple parser for a subset of HTML
	 */
	state = 0;
	pre = flags & MHP_PREFORMAT;
	center = 0;
	underlined = 0;
	slash = 0;
	tagp = buf;
	tblcol = 0;
	needspace = 0;
	blank = 1;
	tblcnt = 0;

	y = 0; 
	x = 0;
	wmove(win, y, x);
	wattrset(win, A_NORMAL);

	for (;;)
	{
	    switch (state)
	    {
	    case 0:
		wlen = minihtml_getword(wbuf, BUFSIZ, fp);

		if (Debug)
			fprintf(stderr,
				"IP: <%s> len=%d x=%d col=%d ex=%d ul=%d\n",
				wbuf, wlen, getcurx(win),
				tblcol, cols ? cols[tblcol].ex : 9999,
				underlined);

		if (wlen <= 0)
			return;
		if (wlen == 1)
			c = wbuf[0];
		else
			c = 0;
		if (c == '<')
		{
			state = '<';
			tagp = buf;
			slash = 0;
			break;
		}
		if (!pre && c == '\r')
			break;
		if (c == ' ')
		{
			needspace = 1;
			break;
		}
		if (c == '\n')
		{
			if (pre)
				minihtml_newline(win);
			break;
		}
		blank = 0;

		x = getcurx(win);
		if (wlen > (getmaxx(win) - x - 1) && !tblcol)
		{
			minihtml_newline(win);
			x = 0;
		}

		// Call user's hook
		if (prewordhook)
		{
			wlen = (*prewordhook)(&tblcol, wbuf, wlen);
			if (!wlen)
				break;
			if (wlen < 0)
				return;
		}

		if (x && needspace)
			if (!tblcol || getcurx(win) < cols[tblcol].ex)
				waddch(win, ' ');
		needspace = 0;

		// Add the word
		for (i = 0; wbuf[i]; ++i)
		{
			if (cols && getcurx(win) > cols[tblcol].ex)
				break;
			waddch(win, wbuf[i]);
		}

		// Call user's hook
		if (postwordhook)
			(*postwordhook)(&tblcol, wbuf, wlen);

		break;
	    case '<':
		c = fgetc(fp);
		if (c == EOF)
			return;
		if (c == '>')
		{
			*tagp = 0;
			parmp = strchr(buf, ' ');
			if (parmp)
				*parmp++ = 0;

			state = 0;

			if (strcmp(buf, "P") == 0 && !slash)
			{
				if (center)
					recenter(win);
				minihtml_newline(win);
				minihtml_newline(win);
			}
			else if (strcmp(buf, "B") == 0)
			{
				wattrset(win,
					slash ? A_NORMAL : A_BOLD);
			}
			else if (strcmp(buf, "STRONG") == 0)
			{
				wattrset(win,
					slash ? A_NORMAL : A_BOLD);
			}
			else if (strcmp(buf, "U") == 0)
			{
				wattrset(win,
					slash ? A_NORMAL : A_UNDERLINE);
				underlined = !slash;
			}
			else if (strcmp(buf, "I") == 0)
			{
				if (flags & MHP_UL_ITALICS)
					wattrset(win,
						slash ? A_NORMAL :
						A_UNDERLINE);
				else
					wattrset(win,
						slash ? A_NORMAL :
						COLOR_PAIR(CYANonBLACK));
			}
			else if (strcmp(buf, "CENTER") == 0)
			{
				if (center && !tblcol)
					recenter(win);
				center = !slash;
			}
			else if (strcmp(buf, "BR") == 0)
			{
				needspace = 1;
			}
			else if (strcmp(buf, "PRE") == 0)
			{
				pre = !slash;
				minihtml_newline(win);
			}
			else if (strcmp(buf, "TABLE") == 0)
			{
				if (!slash)
				    ++tblcnt;
				if (tablehook && cols)
				{
				    tblcnt = (*tablehook)(tblcnt,
						    &cols, &numcol, slash);
				    if (tblcnt == -1)
					    return;
				}
			}
			else if (strcmp(buf, "TR") == 0)
			{
				tblcol = 0;
				if (!slash)
				{
					if (!blank || !(flags&MHP_EATBLANK) )
						minihtml_newline(win);
					blank = 1;
				}
			}
			else if (strequal(buf, "TD") || strequal(buf, "TH"))
			{
				attr_t	attr;
				short	pair;

				if (slash)
				{
					wattrset(win, A_NORMAL);
					break;
				}

				if (!cols)
				{
					needspace = 1;
					break;
				}
				needspace = 0;
				++tblcol;
				if (tblcol >= numcol)
					tblcol = numcol-1;

				wattr_get(win, &attr, &pair, NULL);
				if (attr & A_BOLD)
					wattr_off(win, A_UNDERLINE, NULL);
				while (getcurx(win) < cols[tblcol].sx)
					waddch(win, ' ');
				wattrset(win, attr);

				tblcol += numparm(parmp, "COLSPAN=", 1) - 1;
				if (tblcol >= numcol)
					tblcol = numcol-1;
				if (strequal(buf, "TH"))
					wattrset(win,
						slash
						? A_NORMAL
						: (A_BOLD|A_UNDERLINE));
			}
			else if (strcmp(buf, "LI") == 0 && !slash)
			{
				minihtml_newline(win);
			}
			else if (strcmp(buf, "H5") == 0
				|| strcmp(buf, "H4") == 0
				|| strcmp(buf, "H3") == 0
				|| strcmp(buf, "H2") == 0
				|| strcmp(buf, "H2") == 0)
			{
				wattrset(win, slash ? A_NORMAL : A_BOLD);
				minihtml_newline(win);
			}
			else if (strcmp(buf, "A") == 0)
			{
				if (anchorhook)
					(*anchorhook)(parmp, tblcol, slash);
			}
		}
		else if (tagp == buf && c == '/')
			slash = 1;
		else
		{
			*tagp++ = toupper(c);
		}
		break;
	    }
	}
}
