#include <iostream>
using namespace std;

#define TK_Dlit 192
#define TK_Slit 193
#define TK_Float 194
#define TK_Id 195
#define TK_NameSep 197
#define TK_Arrow 211
#define TK_PlusPlus 212
#define TK_MinusMinus 213
#define TK_ArrowStar 214
#define TK_DotStar 215
#define TK_ShiftLeft 216
#define TK_ShiftRight 217
#define TK_IntegerDecimal 218
#define TK_IntegerOctal 219
#define TK_IntegerHex 220
#define TK_EqualsEquals 223
#define TK_NotEquals 224
#define TK_AndAnd 225
#define TK_OrOr 226
#define TK_MultAssign 227
#define TK_DivAssign 228
#define TK_PercentAssign 229
#define TK_PlusAssign 230
#define TK_MinusAssign 231
#define TK_AmpAssign 232
#define TK_CaretAssign 233
#define TK_BarAssign 234
#define TK_DotDotDot 240
#define TK_Whitespace 241
#define TK_Comment 242

#define BUFSIZE 4096

int act;
char buf[BUFSIZE], *tokstart, *tokend;
void token( int tok, char *data, int len );

struct Scanner
{
	int curs;
	%% interface;
};

%% Scanner
{
	init { 
		act = 0;
		tokstart = 0;
		tokend = 0;
	}

	# Floating literals.
	fract_const = digit* '.' digit+ | digit+ '.';
	exponent = [eE] [+\-]? digit+;
	float_suffix = [flFL];

	main := |*

	# Single and double literals.
	( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit, tokstart, tokend-tokstart+1 );};
	( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit, tokstart, tokend-tokstart+1 );};

	# Identifiers
	( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id, tokstart, tokend-tokstart+1 );};

	# Floating literals.
	( fract_const exponent? float_suffix? |
		digit+ exponent float_suffix? ) { token( TK_Float, tokstart, tokend-tokstart+1 );};
	
	# Integer decimal. Leading part buffered by float.
	( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal, tokstart, tokend-tokstart+1 );};

	# Integer octal. Leading part buffered by float.
	( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal, tokstart, tokend-tokstart+1 );};

	# Integer hex. Leading 0 buffered by float.
	( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex, tokstart, tokend-tokstart+1 );};

	# Only buffer the second item, first buffered by symbol. */
	'::' {token( TK_NameSep, tokstart, tokend-tokstart+1 );};
	'==' {token( TK_EqualsEquals, tokstart, tokend-tokstart+1 );};
	'!=' {token( TK_NotEquals, tokstart, tokend-tokstart+1 );};
	'&&' {token( TK_AndAnd, tokstart, tokend-tokstart+1 );};
	'||' {token( TK_OrOr, tokstart, tokend-tokstart+1 );};
	'*=' {token( TK_MultAssign, tokstart, tokend-tokstart+1 );};
	'/=' {token( TK_DivAssign, tokstart, tokend-tokstart+1 );};
	'%=' {token( TK_PercentAssign, tokstart, tokend-tokstart+1 );};
	'+=' {token( TK_PlusAssign, tokstart, tokend-tokstart+1 );};
	'-=' {token( TK_MinusAssign, tokstart, tokend-tokstart+1 );};
	'&=' {token( TK_AmpAssign, tokstart, tokend-tokstart+1 );};
	'^=' {token( TK_CaretAssign, tokstart, tokend-tokstart+1 );};
	'|=' {token( TK_BarAssign, tokstart, tokend-tokstart+1 );};
	'++' {token( TK_PlusPlus, tokstart, tokend-tokstart+1 );};
	'--' {token( TK_MinusMinus, tokstart, tokend-tokstart+1 );};
	'->' {token( TK_Arrow, tokstart, tokend-tokstart+1 );};
	'->*' {token( TK_ArrowStar, tokstart, tokend-tokstart+1 );};
	'.*' {token( TK_DotStar, tokstart, tokend-tokstart+1 );};

	# Three char compounds, first item already buffered. */
	'...' { token( TK_DotDotDot, tokstart, tokend-tokstart+1 );};

	# Single char symbols.
	( punct - [_"'] ) { token( tokstart[0], tokstart, tokend-tokstart+1 );};

	# Comments and whitespace.
	'/*' ( any* $0 '*/' @1 ) { token( TK_Comment, tokstart, tokend-tokstart+1 );};
	'//' ( any* $0 '\n' @1 ) { token( TK_Comment, tokstart, tokend-tokstart+1 );};
	( any - 33..126 )+ { token( TK_Whitespace, tokstart, tokend-tokstart+1 );};

	*|;
}

void token( int tok, char *data, int len )
{
	cout << "<" << tok << "> ";
	for ( int i = 0; i < len; i++ )
		cout << data[i];
	cout << '\n';
}

int main()
{
	std::ios::sync_with_stdio(false);
	Scanner scanner;
	scanner.init();

	/* Tok start needs to be set up. */
	tokstart = buf;

	/* Do the first read. */
	int have = 0;

	while ( true ) {
		cin.read( buf+have, BUFSIZE-have );
		int newd = cin.gcount();
		if ( newd == 0 )
			break;

		int len = have + newd;
		int rtn = scanner.execute( buf+have, newd );
		if ( rtn < 0 ) {
			/* Machine failed before finding a token. */
			cerr << "PARSE ERROR" << endl;
			exit(1);
		}
		else if ( tokstart == buf && len == BUFSIZE ) {
			/* No failure yet, buffer is full. */
			cerr << "TOKEN TOO BIG" << endl;
			exit(1);
		}
		else {
			/* No failure yet, room still left in buffer. Shift over data and
			 * read more. */
			have = len - (tokstart-buf);
			memmove( buf, tokstart, have );
			tokend -= (tokstart-buf);
			tokstart = buf;
		}
	}

	scanner.finish();

	/* HACK: Need to set up tokend since it is not always set. */
	tokend = buf+have-1;
	if ( act != 0 && act != TK_Comment && act != TK_Whitespace )
		token( 9999999, tokstart, tokend - tokstart + 1 );

	return 0;
}
