/*
 * Parses unix mail boxes into headers and bodies.
 */

#include <iostream>
#include <malloc.h>
#include <stdio.h>

using namespace std;

#define BUFSIZE 2048

/* A growable buffer for collecting headers. */
struct Buffer
{
	Buffer() : data(0), allocated(0), length(0) { }
	~Buffer() { empty(); }

	void append( char p ) {
		if ( ++length > allocated )
			upAllocate( length*2 );
		data[length-1] = p;
	}
		
	void clear() { length = 0; }
	void upAllocate( int len );
	void empty();

	char *data;
	int allocated;
	int length;
};



%% MailboxScanner
	struct {
		Buffer headName;
		Buffer headContent;
		fstack[1];
	};
%%

%% MailboxScanner

	# Buffer the header names.
	action bufHeadName { headName.append(fc); }

	# Prints a blank line after the end of the headers of each message.
	action blankLine { cout << endl; }
	
	# Helpers we will use in matching the date section of the from line.
	day = /[A-Z][a-z][a-z]/;
	month = /[A-Z][a-z][a-z]/;
	year = /[0-9][0-9][0-9][0-9]/;
	time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
	letterZone = /[A-Z][A-Z][A-Z]/;
	numZone = /[+\-][0-9][0-9][0-9][0-9]/;
	zone = letterZone | numZone;
	dayNum = /[0-9 ][0-9]/;

	# These are the different formats of the date minus an obscure
	# type that has a funny string 'remote from xxx' on the end. Taken
	# from c-client in the imap-2000 distribution.
	date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
		( year | year . ' ' . zone | zone . ' ' . year );

	# Note the priority assignment on the end of the from line. While we
	# matching the body of a message we may enter into this machine. We will
	# not leave the body of the previous message until this entire from line is
	# matched. 
	fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n' @(new_msg,1) ;

	# The types of characters that can be used as a header name.
	hchar = print - [ :];

	# Simply eat up an uninteresting header. Return at the first non-ws
	# character following a newline.
	consumeHeader := ( 
			[^\n] | 
			'\n' [ \t] |
			'\n' [^ \t] @{ fhold; fret;}
		)*;

	action finishHeader {
		headContent.append(0);
		cout << headContent.data << endl;
		headContent.clear();
		fhold;
		fret;
	}

	# Display the contents of a header as it is consumed. Collapses line
	# continuations to a single space. 
	printHeader := ( 
			[^\n] @{headContent.append(fc);} | 
			'\n' ( 
				[ \t]+ (
					'\n' @{fhold;} | 
					[^ \t\n] @{headContent.append(' '); fhold;}
				) |
				[^ \t] @finishHeader )
		)*;

	action onHeader 
	{
		headName.append(0);
		if ( strcmp( headName.data, "From" ) == 0 ||
				strcmp( headName.data, "To" ) == 0 ||
				strcmp( headName.data, "Subject" ) == 0 )
		{
			/* Print the header name, then jump to a machine the will display
			 * the contents. */
			cout << headName.data << ":";
			headName.clear();
			fcall printHeader;
		}

		headName.clear();
		fcall consumeHeader;
	}

	header = hchar+ $bufHeadName ':' @onHeader;

	messageLine = [^\n]* '\n' @(new_msg,0);

	# When we get to the last newline we are still matching messageLine
	# so on the last newline it will think we are still in the message.
	# We need this because we can't assume that every newline means
	# the end of the current message, whereas at the same time we requre
	# that there be a newline before the fromLine of the next message.
	message = ( fromLine .  header* .  '\n' @blankLine .  messageLine* . '\n' );

	# Its important that the priority in the fromLine gets bumped up
	# so that we are able to move to new messages. Otherwise we
	# will always stay in the message body of the first message.
	main := message*;
%%

void Buffer::empty()
{
	if ( data != 0 ) {
		free( data );

		data = 0;
		length = 0;
		allocated = 0;
	}
}

void Buffer::upAllocate( int len )
{
	if ( data == 0 )
		data = (char*) malloc( len );
	else
		data = (char*) realloc( data, len );
	allocated = len;
}

MailboxScanner mailbox;
char buf[BUFSIZE];

int main()
{
	mailbox.init();
	while ( 1 ) {
		int len = fread( buf, 1, BUFSIZE, stdin );
		mailbox.execute( buf, len );
		if ( len != BUFSIZE )
			break;
	}
	if ( mailbox.finish() <= 0 )
		cerr << "mailbox: error parsing input" << endl;
	return 0;
}
