/*
 * A mini C-like language scanner.
 */

#include <stdio.h>
#define IDENT_BUFLEN 256

%%{
	machine clang;

	# Function to buffer a character.
	action bufChar {
		if ( identLen < IDENT_BUFLEN ) {
			identBuf[identLen] = fc;
			identLen += 1;
		}
	}

	# Function to clear the buffer.
	action clearBuf {
		identLen = 0;
	}

	# Functions to dump tokens as they are matched.
	action ident {
		identBuf[identLen] = 0;
		printf("ident(%i): %s\n", curLine, identBuf);
	}
	action literal {
		identBuf[identLen] = 0;
		printf("literal(%i): \"%s\"\n", curLine, identBuf);
	}
	action float {
		identBuf[identLen] = 0;
		printf("float(%i): %s\n", curLine, identBuf);
	}
	action int {
		identBuf[identLen] = 0;
		printf("int(%i): %s\n", curLine, identBuf);
	}
	action hex {
		identBuf[identLen] = 0;
		printf("hex(%i): 0x%s\n", curLine, identBuf);
	}
	action symbol {
		identBuf[identLen] = 0;
		printf("symbol(%i): %s\n", curLine, identBuf);
	}

	# Alpha numberic characters or underscore.
	alnumu = alnum | '_';

	# Alpha charactres or underscore.
	alphau = alpha | '_';

	# Symbols. Upon entering clear the buffer. On all transitions
	# buffer a character. Upon leaving dump the symbol.
	symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;

	# Identifier. Upon entering clear the buffer. On all transitions
	# buffer a character. Upon leaving, dump the identifier.
	ident = (alphau . alnumu*) >clearBuf $bufChar %ident;

	# Match single characters inside literal strings. Or match 
	# an escape sequence. Buffers the charater matched.
	sliteralChar =
			( extend - ['\\] ) @bufChar |
			( '\\' . extend @bufChar );
	dliteralChar =
			( extend - ["\\] ) @bufChar |
			( '\\' . extend @bufChar );

	# Single quote and double quota literals. At the start clear
	# the buffer. Upon leaving dump the literal.
	sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
	dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
	literal = sliteral | dliteral;

	# Whitespace is standard ws, newlines and control codes.
	whitespace = any - 0x21..0x7e;

	# Describe both c style comments and c++ style comments. The
	# priority bump on tne terminator of the comments brings us
	# out of the extend* which matches everything.
	ccComment = '//' . extend* $0 . '\n' @1;
	cComment = '/*' . extend* $0 . '*/' @1;

	# Match an integer. We don't bother clearing the buf or filling it.
	# The float machine overlaps with int and it will do it.
	int = digit+ %int;

	# Match a float. Upon entering the machine clear the buf, buffer
	# characters on every trans and dump the float upon leaving.
	float =  ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;

	# Match a hex. Upon entering the hex part, clear the buf, buffer characters
	# on every trans and dump the hex on leaving transitions.
	hex = '0x' . xdigit+ >clearBuf $bufChar %hex;

	# Or together all the lanuage elements.
	fin = ( ccComment |
		cComment |
		symbol |
		ident |
		literal |
		whitespace |
		int |
		float |
		hex );

	# Star the language elements. It is critical in this type of application
	# that we decrease the priority of out transitions before doing so. This
	# is so that when we see 'aa' we stay in the fin machine to match an ident
	# of length two and not wrap around to the front to match two idents of 
	# length one.
	clang_main = ( fin $1 %0 )*;

	# This machine matches everything, taking note of newlines.
	newline = ( any | '\n' @{ curLine += 1; } )*;

	# The final fsm is the lexer intersected with the newline machine which
	# will count lines for us. Since the newline machine accepts everything,
	# the strings accepted is goverened by the clang_main machine, onto which
	# the newline machine overlays line counting.
	main := clang_main & newline;
}%%

#include <stdio.h>
#define BUFSIZE 2048

char buf[BUFSIZE];

%% write data noerror;

int main()
{
	char identBuf[IDENT_BUFLEN+1];
	int identLen;
	int curLine;
	int cs;

	%% write init;
	identLen = 0;
	curLine = 1;

	while ( 1 ) {
		int len = fread( buf, 1, BUFSIZE, stdin );
		char *p = buf, *pe = buf + len;

		%% write exec;

		if ( len != BUFSIZE )
			break;
	}

	%% write eof;

	if ( cs >= clang_first_final )
		printf("ACCEPT\n");
	else
		printf("FAIL\n");
	return 0;
}

