/*
 * %CopyrightBegin%
 * 
 * Copyright Ericsson AB 2008-2009. All Rights Reserved.
 * 
 * The contents of this file are subject to the Erlang Public License,
 * Version 1.1, (the "License"); you may not use this file except in
 * compliance with the License. You should have received a copy of the
 * Erlang Public License along with this software. If not, it can be
 * retrieved online at http://www.erlang.org/.
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 * 
 * %CopyrightEnd%
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
#include "erl_process.h"
#include "error.h"
#include "bif.h"
#include "erl_binary.h"
#include "big.h"

#include "erl_unicode.h"

typedef struct _restart_context {
    byte *bytes;
    Uint num_processed_bytes;
    Uint num_bytes_to_process;
    Uint num_resulting_chars;
    int state;
} RestartContext;


#define LOOP_FACTOR 10
#define LOOP_FACTOR_SIMPLE 50 /* When just counting */

static Uint max_loop_limit;

static BIF_RETTYPE utf8_to_list(BIF_ALIST_1);
static BIF_RETTYPE finalize_list_to_list(Process *p, 
					 byte *bytes,
					 Eterm rest,
					 Uint num_processed_bytes,
					 Uint num_bytes_to_process, 
					 Uint num_resulting_chars, 
					 int state, int left,
					 Eterm tail);
static int analyze_utf8(byte *source, Uint size, 
			byte **err_pos, Uint *num_chars, int *left);
#define UTF8_OK 0
#define UTF8_INCOMPLETE 1
#define UTF8_ERROR 2
#define UTF8_ANALYZE_MORE 3

static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3);
static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3);
static BIF_RETTYPE characters_to_list_trap_2(BIF_ALIST_3);

static BIF_RETTYPE characters_to_list_trap_3(BIF_ALIST_3);
static BIF_RETTYPE characters_to_list_trap_4(BIF_ALIST_1);

static Export characters_to_utf8_trap_exp;
static Export characters_to_list_trap_1_exp;
static Export characters_to_list_trap_2_exp;

static Export characters_to_list_trap_3_exp;
static Export characters_to_list_trap_4_exp;

static Export *c_to_b_int_trap_exportp = NULL;
static Export *c_to_l_int_trap_exportp = NULL;

void erts_init_unicode(void)
{
    max_loop_limit = CONTEXT_REDS * LOOP_FACTOR;
    /* Non visual BIFs to trap to. */
    memset(&characters_to_utf8_trap_exp, 0, sizeof(Export));
    characters_to_utf8_trap_exp.address = 
	&characters_to_utf8_trap_exp.code[3];
    characters_to_utf8_trap_exp.code[0] = am_erlang;
    characters_to_utf8_trap_exp.code[1] = 
	am_atom_put("characters_to_utf8_trap",23);
    characters_to_utf8_trap_exp.code[2] = 3;
    characters_to_utf8_trap_exp.code[3] =
	(Eterm) em_apply_bif;
    characters_to_utf8_trap_exp.code[4] = 
	(Eterm) &characters_to_utf8_trap;

    memset(&characters_to_list_trap_1_exp, 0, sizeof(Export));
    characters_to_list_trap_1_exp.address = 
	&characters_to_list_trap_1_exp.code[3];
    characters_to_list_trap_1_exp.code[0] = am_erlang;
    characters_to_list_trap_1_exp.code[1] = 
	am_atom_put("characters_to_list_trap_1",25);
    characters_to_list_trap_1_exp.code[2] = 3;
    characters_to_list_trap_1_exp.code[3] =
	(Eterm) em_apply_bif;
    characters_to_list_trap_1_exp.code[4] = 
	(Eterm) &characters_to_list_trap_1;

    memset(&characters_to_list_trap_2_exp, 0, sizeof(Export));
    characters_to_list_trap_2_exp.address = 
	&characters_to_list_trap_2_exp.code[3];
    characters_to_list_trap_2_exp.code[0] = am_erlang;
    characters_to_list_trap_2_exp.code[1] = 
	am_atom_put("characters_to_list_trap_2",25);
    characters_to_list_trap_2_exp.code[2] = 3;
    characters_to_list_trap_2_exp.code[3] =
	(Eterm) em_apply_bif;
    characters_to_list_trap_2_exp.code[4] = 
	(Eterm) &characters_to_list_trap_2;


    memset(&characters_to_list_trap_3_exp, 0, sizeof(Export));
    characters_to_list_trap_3_exp.address = 
	&characters_to_list_trap_3_exp.code[3];
    characters_to_list_trap_3_exp.code[0] = am_erlang;
    characters_to_list_trap_3_exp.code[1] = 
	am_atom_put("characters_to_list_trap_3",25);
    characters_to_list_trap_3_exp.code[2] = 3;
    characters_to_list_trap_3_exp.code[3] =
	(Eterm) em_apply_bif;
    characters_to_list_trap_3_exp.code[4] = 
	(Eterm) &characters_to_list_trap_3;

    memset(&characters_to_list_trap_4_exp, 0, sizeof(Export));
    characters_to_list_trap_4_exp.address = 
	&characters_to_list_trap_4_exp.code[3];
    characters_to_list_trap_4_exp.code[0] = am_erlang;
    characters_to_list_trap_4_exp.code[1] = 
	am_atom_put("characters_to_list_trap_4",25);
    characters_to_list_trap_4_exp.code[2] = 1;
    characters_to_list_trap_4_exp.code[3] =
	(Eterm) em_apply_bif;
    characters_to_list_trap_4_exp.code[4] = 
	(Eterm) &characters_to_list_trap_4;

    c_to_b_int_trap_exportp =  erts_export_put(am_unicode,am_characters_to_binary_int,2);
    c_to_l_int_trap_exportp =  erts_export_put(am_unicode,am_characters_to_list_int,2);
    

}


static ERTS_INLINE void *alloc_restart(size_t size)
{
    return erts_alloc(ERTS_ALC_T_UNICODE_BUFFER,size);
}

static ERTS_INLINE void free_restart(void *ptr)
{
    erts_free(ERTS_ALC_T_UNICODE_BUFFER, ptr);
}

static void cleanup_restart_context(RestartContext *rc)
{
    if (rc->bytes != NULL) {
	free_restart(rc->bytes);
	rc->bytes = NULL;
    }
}

static void cleanup_restart_context_bin(Binary *bp)
{
    RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp);
    cleanup_restart_context(rc);
}

static RestartContext *get_rc_from_bin(Eterm bin)
{
    Binary *mbp;
    ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin));

    mbp = ((ProcBin *) binary_val(bin))->val;

    ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp)
	   == cleanup_restart_context_bin);
    return (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp);    
}

static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc)
{
    Binary *mbp = erts_create_magic_binary(sizeof(RestartContext),
					   cleanup_restart_context_bin);
    RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp);
    Eterm *hp;
    memcpy(restartp,rc,sizeof(RestartContext));
    hp = HAlloc(p, PROC_BIN_SIZE);
    return erts_mk_magic_binary_term(&hp, &MSO(p), mbp);
}

	
Sint erts_unicode_set_loop_limit(Sint limit) 
{
    Sint save = (Sint) max_loop_limit;
    if (limit <= 0) {
	max_loop_limit = CONTEXT_REDS * LOOP_FACTOR;
    } else {
	max_loop_limit = (Uint) limit;
    }
    return save;
}

static ERTS_INLINE int allowed_iterations(Process *p)
{
    int tmp = ERTS_BIF_REDS_LEFT(p) * LOOP_FACTOR;
    int tmp2 = max_loop_limit;
    if (tmp2 < tmp)
	return tmp2;
    else
	return tmp;
}
static ERTS_INLINE int cost_to_proc(Process *p, int cost)
{
    int x = (cost / LOOP_FACTOR);
    BUMP_REDS(p,x);
    return x;
}
static ERTS_INLINE int simple_loops_to_common(int cost)
{
    int factor = (LOOP_FACTOR_SIMPLE / LOOP_FACTOR);
    return (cost / factor);
}

static Sint aligned_binary_size(Eterm binary)
{
    unsigned char *bytes;
    Uint bitoffs;
    Uint bitsize;
    
    ERTS_GET_BINARY_BYTES(binary, bytes, bitoffs, bitsize);
    if (bitsize != 0) {
	return (Sint) -1;
    }
    return binary_size(binary);
}

static Sint latin1_binary_need(Eterm binary)
{
    unsigned char *bytes;
    byte *temp_alloc = NULL;
    Uint bitoffs;
    Uint bitsize;
    Uint size;
    Sint need = 0;
    Sint i;
    
    ERTS_GET_BINARY_BYTES(binary, bytes, bitoffs, bitsize);
    if (bitsize != 0) {
	return (Sint) -1;
    }
    if (bitoffs != 0) {
	bytes = erts_get_aligned_binary_bytes(binary, &temp_alloc);
	/* The call to erts_get_aligned_binary_bytes cannot fail as 
	   we'we already checked bitsize and that this is a binary */
    }
    size = binary_size(binary);
    for(i = 0; i < size; ++i) {
	if (bytes[i] & ((byte) 0x80)) {
	    need += 2;
	} else {
	    need += 1;
	}
    }
    erts_free_aligned_binary_bytes(temp_alloc);
    return need;
}

static int utf8_len(byte first) 
{
    if ((first & ((byte) 0x80)) == 0) {
	return 1;
    } else if ((first & ((byte) 0xE0)) == 0xC0) {
	return 2;
    } else if ((first & ((byte) 0xF0)) == 0xE0) {
	return 3;
    } else if ((first & ((byte) 0xF8)) == 0xF0) {
	return 4;
    } 
    return -1;
}

static int copy_utf8_bin(byte *target, byte *source, Uint size, 
			 byte *leftover, int *num_leftovers, 
			 byte **err_pos, Uint *characters) {
    int copied = 0;
    if (leftover != NULL && *num_leftovers) {
	int need = utf8_len(leftover[0]);
	int from_source = need - (*num_leftovers);
	int c;
	byte *tmp_err_pos = NULL;
	ASSERT(need > 0);
	ASSERT(from_source > 0);
	if (size < from_source) {
	    memcpy(leftover + (*num_leftovers), source, size);
	    *num_leftovers += size;
	    return 0;
	}
	/* leftover has room for four bytes (see bif) */
	memcpy(leftover + (*num_leftovers),source,from_source);
	c = copy_utf8_bin(target, leftover, need, NULL, NULL, &tmp_err_pos, characters);
	if (tmp_err_pos != 0) {
	    *err_pos = source;
	    return 0;
	}
	copied += c;
	*num_leftovers = 0;
	size -= from_source;
	target += c;
	source += from_source;
    }
    while (size) {
	if (((*source) & ((byte) 0x80)) == 0) {
	    *(target++) = *(source++);
	    --size; ++copied;
	} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
	    if (leftover && size < 2) {
		*leftover = *source;
		*num_leftovers = 1;
		break;
	    }
	    if (size < 2 || ((source[1] & ((byte) 0xC0)) != 0x80) ||
		((*source) < 0xC2) /* overlong */) {
		*err_pos = source;
		return copied;
	    }
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    size -= 2; copied += 2;
	} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
	    if (leftover && size < 3) {
		memcpy(leftover, source, (int) size);
		*num_leftovers = (int) size;
		break;
	    }
	    if (size < 3 || ((source[1] & ((byte) 0xC0)) != 0x80) ||
		((source[2] & ((byte) 0xC0)) != 0x80) ||
		(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
		*err_pos = source;
		return copied;
	    }
	    if ((((*source) & ((byte) 0xF)) == 0xD) && 
		((source[1] & 0x20) != 0)) {
		*err_pos = source;
		return copied;
	    }

	    if (((*source) == 0xEF) && (source[1] == 0xBF) &&
		((source[2] == 0xBE) || (source[2] == 0xBF))) {
		*err_pos = source;
		return copied;
	    }
		
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    size -= 3; copied += 3;
	} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
	    if (leftover && size < 4) {
		memcpy(leftover, source, (int) size);
		*num_leftovers = (int) size;
		break;
	    }
	    if (size < 4 || ((source[1] & ((byte) 0xC0)) != 0x80) ||
		((source[2] & ((byte) 0xC0)) != 0x80) ||
		((source[3] & ((byte) 0xC0)) != 0x80) ||
		(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
		*err_pos = source;
		return copied;
	    }
	    if ((((*source) & ((byte)0x7)) > 0x4U) ||
		((((*source) & ((byte)0x7)) == 0x4U) && 
		 ((source[1] & ((byte)0x3F)) > 0xFU))) {
		*err_pos = source;
		return copied;
	    }
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    *(target++) = *(source++);
	    size -= 4; copied +=4;
	} else {
	    *err_pos = source;
	    return copied;
	}
	++(*characters);
    }
    return copied;
}
	    
	    
    
static Sint utf8_need(Eterm ioterm, int latin1, Uint *costp) 
{
    Eterm *objp;
    Eterm obj;
    DECLARE_ESTACK(stack);
    Sint need = 0;
    Uint cost = 0;

    if (is_nil(ioterm)) {
	DESTROY_ESTACK(stack);
	*costp = 0;
	return need;
    }
    if(is_binary(ioterm)) {
	DESTROY_ESTACK(stack);
	if (latin1) {
	    Sint x = latin1_binary_need(ioterm);
	    *costp = x;
	    return x;
	} else {
	    *costp = 1;
	    return aligned_binary_size(ioterm);
	}
    }
    
    if (!is_list(ioterm)) {
	DESTROY_ESTACK(stack);
	*costp = 0;
	return (Sint) -1;
    }
    /* OK a list, needs to be processed in order, handling each flat list-level
       as they occur, just like io_list_to_binary would */
    ESTACK_PUSH(stack,ioterm);
    while (!ESTACK_ISEMPTY(stack)) {
	ioterm = ESTACK_POP(stack);
	if (is_nil(ioterm)) {
	    /* ignore empty lists */
	    continue;
	}
	if(is_list(ioterm)) {
L_Again:   /* Restart with sublist, old listend was pushed on stack */
	    objp = list_val(ioterm);
	    obj = CAR(objp);
	    for(;;) { /* loop over one flat list of bytes and binaries
		         until sublist or list end is encountered */
		if (is_small(obj)) { /* Always small */
		    for(;;) {
			Uint x = unsigned_val(obj);
			if (x < 0x80)
			    need +=1;
			else if (x < 0x800)
			    need += 2;
			else if (x < 0x10000) 
			    need += 3;
			else 
			    need += 4; 
			/* everything else will give badarg later 
			   in the process, so we dont check */
			++cost;
			ioterm = CDR(objp);
			if (!is_list(ioterm)) {
			    break;
			}
			objp = list_val(ioterm);
			obj = CAR(objp);
			if (!is_byte(obj))
			    break;
		    }
		} else if (is_nil(obj)) {
		    ioterm = CDR(objp);
		    if (!is_list(ioterm)) {
			break;
		    }
		    objp = list_val(ioterm);
		    obj = CAR(objp);
		} else if (is_list(obj)) {
		    /* push rest of list for later processing, start 
		       again with sublist */
		    ESTACK_PUSH(stack,CDR(objp));
		    ioterm = obj;
		    goto L_Again;
		} else if (is_binary(obj)) {
		    Sint x;

		    if (latin1) { 
			x = latin1_binary_need(obj);
			if (x < 0) {
			    DESTROY_ESTACK(stack);
			    *costp = cost;
			    return x;
			} 
			cost += x;
		    } else {
			x = aligned_binary_size(obj);
			if (x < 0) {
			    DESTROY_ESTACK(stack);
			    *costp = cost;
			    return x;
			} 
			++cost;
		    }
		    need += x;
		    ioterm = CDR(objp);
		    if (is_list(ioterm)) {
			/* objp and obj need to be updated if 
			   loop is to continue */
			objp = list_val(ioterm);
			obj = CAR(objp);
		    }
		} else {
		    DESTROY_ESTACK(stack);
		    *costp = cost;
		    return ((Sint) -1);
		} 
		if (is_nil(ioterm) || !is_list(ioterm)) {
		    break;
		}
	    } /* for(;;) */
	} /* is_list(ioterm) */
	
	if (!is_list(ioterm) && !is_nil(ioterm)) {
	    /* inproper list end */
	    if (is_binary(ioterm)) {
		Sint x; 
		if (latin1) {
		    x = latin1_binary_need(ioterm);
		    if (x < 0) {
			DESTROY_ESTACK(stack);
			*costp = cost;
			return x;
		    } 
		    cost += x;
		} else {
		    x = aligned_binary_size(ioterm);
		    if (x < 0) {
			DESTROY_ESTACK(stack);
			*costp = cost;
			return x;
		    } 
		    ++cost;
		}
		need += x;
	    } else {
		DESTROY_ESTACK(stack);
		*costp = cost;
		return ((Sint) -1);
	    }
	}
    } /* while  not estack empty */
    DESTROY_ESTACK(stack);
    *costp = cost;
    return need;
}
    
    
static Eterm do_build_utf8(Process *p, Eterm ioterm, int *left, int latin1,
			   byte *target, int *pos, Uint *characters, int *err, 
			   byte *leftover, int *num_leftovers)
{
    int c;
    Eterm *objp;
    Eterm obj;
    DECLARE_ESTACK(stack);

    *err = 0;
    if ((*left) <= 0 || is_nil(ioterm)) {
	DESTROY_ESTACK(stack);
	return ioterm;
    }
    if(is_binary(ioterm)) {
	Uint bitoffs;
	Uint bitsize;
	Uint size;
	Uint i;
	Eterm res_term = NIL;
	unsigned char *bytes;
	byte *temp_alloc = NULL;
	Uint orig_size;
	
	ERTS_GET_BINARY_BYTES(ioterm, bytes, bitoffs, bitsize);
	if (bitsize != 0) {
	    *err = 1;
	    DESTROY_ESTACK(stack);
	    return ioterm;
	}
	if (bitoffs != 0) {
	    bytes = erts_get_aligned_binary_bytes(ioterm, &temp_alloc);
	    /* The call to erts_get_aligned_binary_bytes cannot fail as 
	       we'we already checked bitsize and that this is a binary */
	}

	orig_size = size = binary_size(ioterm);

	/* This is done to avoid splitting binaries in two 
	   and then create an unnecessary rest that eventually gives an error.
	   For cases where errors are not returned this is unnecessary */
	if (!latin1) { 
	    /* Find a valid character boundary */
	    while (size > (*left) && 
		   (((byte) bytes[(*left)]) & ((byte) 0xC0)) == ((byte) 0x80)) {
		++(*left);
	    }
	}

	if (size > (*left)) {
	    Eterm *hp;
	    ErlSubBin *sb;
	    Eterm orig;
	    Uint offset;
	    /* Split the binary in two parts, of which we 
	       only process the first */
	    hp = HAlloc(p, ERL_SUB_BIN_SIZE);
	    sb = (ErlSubBin *) hp;
	    ERTS_GET_REAL_BIN(ioterm, orig, offset, bitoffs, bitsize);
	    sb->thing_word = HEADER_SUB_BIN;
	    sb->size = size - (*left);
	    sb->offs = offset + (*left);
	    sb->orig = orig;
	    sb->bitoffs = bitoffs;
	    sb->bitsize = bitsize;
	    sb->is_writable = 0;
	    res_term = make_binary(sb);
	    size = (*left);
	}

	if (!latin1) {
	    int num;
	    byte *err_pos = NULL;
	    num = copy_utf8_bin(target + (*pos), bytes, 
				size, leftover, num_leftovers,&err_pos,characters);
	    *pos += num;
	    if (err_pos != NULL) {
		int rest_bin_offset;
		int rest_bin_size;
		Eterm *hp;
		ErlSubBin *sb;
		Eterm orig;
		Uint offset;

		*err = 1;
		/* we have no real stack, just build a list of the binaries
		   we have not decoded... */
		DESTROY_ESTACK(stack);

		rest_bin_offset = (err_pos - bytes);
		rest_bin_size = orig_size - rest_bin_offset;
		
		hp = HAlloc(p, ERL_SUB_BIN_SIZE);
		sb = (ErlSubBin *) hp;
		ERTS_GET_REAL_BIN(ioterm, orig, offset, bitoffs, bitsize);
		sb->thing_word = HEADER_SUB_BIN;
		sb->size = rest_bin_size;
		sb->offs = offset + rest_bin_offset;
		sb->orig = orig;
		sb->bitoffs = bitoffs;
		sb->bitsize = bitsize;
		sb->is_writable = 0;
		res_term = make_binary(sb);
		erts_free_aligned_binary_bytes(temp_alloc);
		return res_term;
	    }
	} else {
	    i = 0;
	    while(i < size) {
		if (bytes[i] < 0x80) {
		    target[(*pos)++] = bytes[i++];
		} else {
		    target[(*pos)++] = ((bytes[i] >> 6) | ((byte) 0xC0));
		    target[(*pos)++] = ((bytes[i] & 0x3F) | ((byte) 0x80));
		    ++i;
		}
		++(*characters);
	    }
	}
	*left -= size;
	DESTROY_ESTACK(stack);
	erts_free_aligned_binary_bytes(temp_alloc);
	return res_term;
    }
	
    if (!is_list(ioterm)) {
	*err = 1;
	goto done;
    }

    /* OK a list, needs to be processed in order, handling each flat list-level
       as they occur, just like io_list_to_binary would */
    ESTACK_PUSH(stack,ioterm);
    while (!ESTACK_ISEMPTY(stack) && (*left)) {
	ioterm = ESTACK_POP(stack);
	if (is_nil(ioterm)) {
	    /* ignore empty lists */
	    continue;
	}
	if(is_list(ioterm)) {
L_Again:   /* Restart with sublist, old listend was pushed on stack */
	    objp = list_val(ioterm);
	    obj = CAR(objp);
	    for(;;) { /* loop over one flat list of bytes and binaries
		         until sublist or list end is encountered */
		if (is_small(obj)) { /* Always small in unicode*/
		    if (*num_leftovers) {
			/* Have rest from previous bin and this is an integer, not allowed */
			*err = 1;
			goto done;
		    }
		    for(;;) {
			Uint x = unsigned_val(obj);
			if (latin1 && x > 255) {
			    *err = 1;
			    goto done;
			}
			if (x < 0x80) {
			    target[(*pos)++] = (byte) x;
			}
			else if (x < 0x800) {
			    target[(*pos)++] = (((byte) (x >> 6)) | 
						 ((byte) 0xC0));
			    target[(*pos)++] = (((byte) (x & 0x3F)) | 
						((byte) 0x80));
			} else if (x < 0x10000) {
			    if ((x >= 0xD800 && x <= 0xDFFF) ||
				(x == 0xFFFE) ||
				(x == 0xFFFF)) { /* Invalid unicode range */
				*err = 1;
				goto done;
			    }
			    target[(*pos)++] = (((byte) (x >> 12)) | 
						((byte) 0xE0));
			    target[(*pos)++] = ((((byte) (x >> 6)) & 0x3F)  | 
						((byte) 0x80));
			    target[(*pos)++] = (((byte) (x & 0x3F)) | 
						((byte) 0x80));
			} else if (x < 0x110000) { /* Standard imposed max */
			    target[(*pos)++] = (((byte) (x >> 18)) | 
						((byte) 0xF0));
			    target[(*pos)++] = ((((byte) (x >> 12)) & 0x3F)  | 
						((byte) 0x80));
			    target[(*pos)++] = ((((byte) (x >> 6)) & 0x3F)  | 
						((byte) 0x80));
			    target[(*pos)++] = (((byte) (x & 0x3F)) | 
						((byte) 0x80));
			} else {
				*err = 1;
				goto done;
			}
			++(*characters);
			--(*left);
			ioterm = CDR(objp);
			if (!is_list(ioterm) || !(*left)) {
			    break;
			}
			objp = list_val(ioterm);
			obj = CAR(objp);
			if (!is_small(obj))
			    break;
		    }
		} else if (is_nil(obj)) {
		    ioterm = CDR(objp);
		    if (!is_list(ioterm)) {
			break;
		    }
		    objp = list_val(ioterm);
		    obj = CAR(objp);
		} else if (is_list(obj)) {
		    /* push rest of list for later processing, start 
		       again with sublist */
		    ESTACK_PUSH(stack,CDR(objp));
		    ioterm = obj;
		    goto L_Again;
		} else if (is_binary(obj)) {
		    Eterm rest_term;
		    rest_term = do_build_utf8(p,obj,left,latin1,target,pos, characters, err, 
					      leftover, num_leftovers);
		    if ((*err) != 0) {
			Eterm *hp;
			hp = HAlloc(p, 2);
			obj = CDR(objp);
			ioterm = CONS(hp, rest_term, obj);
			//(*left) = 0;
			goto done;
		    }
		    if (rest_term != NIL) {
			Eterm *hp;
			hp = HAlloc(p, 2);
			obj = CDR(objp);
			ioterm = CONS(hp, rest_term, obj);
			(*left) = 0;
			break;
		    }
		    ioterm = CDR(objp);
		    if (is_list(ioterm)) {
			/* objp and obj need to be updated if 
			   loop is to continue */
			objp = list_val(ioterm);
			obj = CAR(objp);
		    }
		} else {
		    *err = 1;
		    goto done;
		} 
		if (!(*left) || is_nil(ioterm) || !is_list(ioterm)) {
		    break;
		}
	    } /* for(;;) */
	} /* is_list(ioterm) */

	if ((*left) && !is_list(ioterm) && !is_nil(ioterm)) {
	    /* inproper list end */
	    if (is_binary(ioterm)) {
		ioterm = do_build_utf8(p,ioterm,left,latin1,target,pos,characters,err,leftover,num_leftovers);
		if ((*err) != 0) {
		    goto done;
		}
	    } else {
		*err = 1;
		goto done;
	    }
	}
    } /* while left and not estack empty */
 done:
    c = ESTACK_COUNT(stack);
    if (c > 0) {
	Eterm *hp = HAlloc(p,2*c);
	while(!ESTACK_ISEMPTY(stack)) {
	    Eterm st = ESTACK_POP(stack);
	    ioterm = CONS(hp, ioterm, st);
	    hp += 2;
	}
    }
    DESTROY_ESTACK(stack);
    return ioterm;

}

static int check_leftovers(byte *source, int size) 
{
    if (((*source) & ((byte) 0xE0)) == 0xC0) {
	return 0;
    } else if (((*source) & ((byte) 0xF0)) == 0xE0) {
	if (size < 2 || 
	    (size < 3 && ((source[1] & ((byte) 0xC0)) == 0x80))) { 
	    return 0;
	}
    } else if (((*source) & ((byte) 0xF8)) == 0xF0) {
	if (size < 2 ||
	    (size < 3 && ((source[1] & ((byte) 0xC0)) == 0x80)) ||
	    (size < 4 && 
	     ((source[1] & ((byte) 0xC0)) == 0x80) &&
	     ((source[2] & ((byte) 0xC0)) == 0x80))) {
	    return 0;
	}
    }
    return -1;
}
	
	 

static BIF_RETTYPE build_utf8_return(Process *p,Eterm bin,int pos,
			       Eterm rest_term,int err,
			       byte *leftover,int num_leftovers,Eterm latin1)
{
    Eterm *hp;
    Eterm ret;

    binary_size(bin) = pos;
    if (err) {
	if (num_leftovers > 0) {
	    Eterm leftover_bin = new_binary(p, leftover, num_leftovers);
	    hp = HAlloc(p,8);
	    rest_term = CONS(hp,rest_term,NIL);
	    hp += 2;
	    rest_term = CONS(hp,leftover_bin,rest_term);
	    hp += 2;
	} else {
	   hp = HAlloc(p,4);
	} 
	ret = TUPLE3(hp,am_error,bin,rest_term);
    } else if (rest_term == NIL && num_leftovers != 0) {
	Eterm leftover_bin = new_binary(p, leftover, num_leftovers);
	if (check_leftovers(leftover,num_leftovers) != 0) {
	    hp = HAlloc(p,4);
	    ret = TUPLE3(hp,am_error,bin,leftover_bin);
	} else {
	    hp = HAlloc(p,4);
	    ret = TUPLE3(hp,am_incomplete,bin,leftover_bin);
	}
    } else { /* All OK */	    
	if (rest_term != NIL) { /* Trap */
	    if (num_leftovers > 0) {
		Eterm rest_bin = new_binary(p, leftover, num_leftovers);
		hp = HAlloc(p,2);
		rest_term = CONS(hp,rest_bin,rest_term);
	    }
	    BUMP_ALL_REDS(p);
	    BIF_TRAP3(&characters_to_utf8_trap_exp, p, bin, rest_term, latin1);
	} else { /* Success */
	    /*hp = HAlloc(p,5);
	      ret = TUPLE4(hp,bin,rest_term,make_small(pos),make_small(err));*/
	    ret = bin;
	}
    }
    BIF_RET(ret);
}


static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3)
{
    Eterm *real_bin;
    Sint need;
    byte* bytes;
    Eterm rest_term;
    int left, sleft;
    int pos;
    int err;
    byte leftover[4]; /* used for temp buffer too, 
			 otherwise 3 bytes would have been enough */
    int num_leftovers = 0;
    int latin1 = 0;
    Uint characters = 0;
    
    /*erts_printf("Trap %T!\r\n",BIF_ARG_2);*/
    ASSERT(is_binary(BIF_ARG_1));
    real_bin = binary_val(BIF_ARG_1);
    ASSERT(*real_bin == HEADER_PROC_BIN);
    need = ((ProcBin *) real_bin)->val->orig_size;
    pos = (int) binary_size(BIF_ARG_1);
    bytes = binary_bytes(BIF_ARG_1);
    sleft = left = allowed_iterations(BIF_P);
    err = 0;
    if (BIF_ARG_3 == am_latin1) {
	latin1 = 1;
    } 
    rest_term = do_build_utf8(BIF_P, BIF_ARG_2, &left, latin1,
			      bytes, &pos, &characters, &err, leftover, &num_leftovers); 
    cost_to_proc(BIF_P, sleft - left);
    return build_utf8_return(BIF_P,BIF_ARG_1,pos,rest_term,err,
			      leftover,num_leftovers,BIF_ARG_3);
}

BIF_RETTYPE unicode_bin_is_7bit_1(BIF_ALIST_1)
{
    Sint need;
    if(!is_binary(BIF_ARG_1)) {
	BIF_RET(am_false);
    }
    need = latin1_binary_need(BIF_ARG_1);
    if(need >= 0 && aligned_binary_size(BIF_ARG_1) == need) {
	BIF_RET(am_true);
    }
    BIF_RET(am_false);
}

static int is_valid_utf8(Eterm orig_bin)
{
    Uint bitoffs;
    Uint bitsize;
    Uint size;
    byte *temp_alloc = NULL;
    byte *endpos;
    Uint numchar;
    byte *bytes;
    int ret;

    ERTS_GET_BINARY_BYTES(orig_bin, bytes, bitoffs, bitsize);
    if (bitsize != 0) {
	return 0;
    }
    if (bitoffs != 0) {
	bytes = erts_get_aligned_binary_bytes(orig_bin, &temp_alloc);
    }
    size = binary_size(orig_bin);
    ret = analyze_utf8(bytes,
		       size,
		       &endpos,&numchar,NULL);
    erts_free_aligned_binary_bytes(temp_alloc);
    return (ret == UTF8_OK);
}

BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2)
{
    Sint need;
    Uint characters;
    int latin1;
    Eterm bin;
    byte *bytes;
    int pos;
    int err;
    int left, sleft;
    Eterm rest_term, subject;
    byte leftover[4]; /* used for temp buffer too, o
			 therwise 3 bytes would have been enough */
    int num_leftovers = 0;
    Uint cost_of_utf8_need;


    if (BIF_ARG_2 == am_latin1) {
	latin1 = 1;
    } else if (BIF_ARG_2 == am_unicode || BIF_ARG_2 == am_utf8) {
	latin1 = 0;
    } else {
	BIF_TRAP2(c_to_b_int_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2);
    }	
    if (is_list(BIF_ARG_1) && is_binary(CAR(list_val(BIF_ARG_1))) && 
	is_nil(CDR(list_val(BIF_ARG_1)))) {
	subject = CAR(list_val(BIF_ARG_1));
    } else {
	subject = BIF_ARG_1;
    }

    need = utf8_need(subject,latin1,&cost_of_utf8_need);
    if (need < 0) {
	BIF_ERROR(BIF_P,BADARG);
    }
    if (is_binary(subject) && need >= 0 && aligned_binary_size(subject) == need
	&& (latin1 || is_valid_utf8(subject))) {
	cost_to_proc(BIF_P, simple_loops_to_common(cost_of_utf8_need)); 
	    BIF_RET(subject);
    }
	

    bin = erts_new_mso_binary(BIF_P, (byte *)NULL, need);
    bytes = binary_bytes(bin);
    cost_to_proc(BIF_P, simple_loops_to_common(cost_of_utf8_need)); 
    left = allowed_iterations(BIF_P) - 
	simple_loops_to_common(cost_of_utf8_need);
    if (left <= 0) {
	/* simplified - let everything be setup by setting left to 1 */
	left = 1;
    }
    sleft = left;
    pos = 0;
    err = 0;


    rest_term = do_build_utf8(BIF_P, subject, &left, latin1,
			      bytes, &pos, &characters, &err, leftover, &num_leftovers); 
#ifdef HARDDEBUG
    if (left == 0) {
	Eterm bin;
	if (is_binary(subject)) {
	    bin = subject;
	} else if(is_list(subject) && is_binary(CAR(list_val(subject)))) {
	    bin = CAR(list_val(subject));
	} else {
	    bin = NIL;
	}
	if (is_binary(bin)) {
	    byte *t = NULL;
	    Uint sz = binary_size(bin);
	    byte *by = erts_get_aligned_binary_bytes(bin,&t);
	    int i;
	    erts_printf("<<");
	    for (i = 0;i < sz; ++i) {
		erts_printf((i == sz -1) ? "0x%X" : "0x%X, ", (unsigned) by[i]);
	    }
	    erts_printf(">>: ");
	    erts_free_aligned_binary_bytes(t);
	}
	erts_printf("%d - %d = %d\n",sleft,left,sleft - left);
    }
#endif
    cost_to_proc(BIF_P, sleft - left); 
    return build_utf8_return(BIF_P,bin,pos,rest_term,err,
			     leftover,num_leftovers,BIF_ARG_2);
}

static BIF_RETTYPE build_list_return(Process *p, byte *bytes, int pos, Uint characters,
				     Eterm rest_term, int err,
				     byte *leftover, int num_leftovers,
				     Eterm latin1, int left)
{
    Eterm *hp;
    
    if (left <= 0) {
	left = 1;
    }
    
    if (err) {
	if (num_leftovers > 0) {
	    Eterm leftover_bin = new_binary(p, leftover, num_leftovers);
	    hp = HAlloc(p,4);
	    rest_term = CONS(hp,rest_term,NIL);
	    hp += 2;
	    rest_term = CONS(hp,leftover_bin,rest_term);
	}
	BIF_RET(finalize_list_to_list(p, bytes, rest_term, 0U, pos, characters, UTF8_ERROR, left, NIL));
    } else if (rest_term == NIL && num_leftovers != 0) {
	Eterm leftover_bin = new_binary(p, leftover, num_leftovers);
	if (check_leftovers(leftover,num_leftovers) != 0) {
	    BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, UTF8_ERROR, 
					  left, NIL));
	} else {
	    BIF_RET(finalize_list_to_list(p, bytes, leftover_bin, 0U, pos, characters, UTF8_INCOMPLETE, 
					  left, NIL));
	}
    } else { /* All OK */	    
	if (rest_term != NIL) { /* Trap */
	    RestartContext rc;
	    if (num_leftovers > 0) {
		Eterm rest_bin = new_binary(p, leftover, num_leftovers);
		hp = HAlloc(p,2);
		rest_term = CONS(hp,rest_bin,rest_term);
	    }
	    BUMP_ALL_REDS(p);
	    rc.bytes = bytes;
	    rc.num_processed_bytes = 0; /* not used */
	    rc.num_bytes_to_process = pos;
	    rc.num_resulting_chars = characters;
	    rc.state = UTF8_OK; /* not used */
	    BIF_TRAP3(&characters_to_list_trap_1_exp, p, make_magic_bin_for_restart(p,&rc), 
		      rest_term, latin1);
	} else { /* Success */
	    BIF_RET(finalize_list_to_list(p, bytes, NIL, 0U, pos, characters, UTF8_OK, left, NIL));
	}
    }
}

static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3)
{
    RestartContext *rc;
    byte* bytes;
    int pos;
    Uint characters;
    int err;
    Eterm rest_term;
    int left, sleft;

    int latin1 = 0;
    byte leftover[4]; /* used for temp buffer too, 
			 otherwise 3 bytes would have been enough */
    int num_leftovers = 0;
    

    rc = get_rc_from_bin(BIF_ARG_1);

    bytes = rc->bytes;
    rc->bytes = NULL; /* to avoid free due to later GC */
    pos = rc->num_bytes_to_process;
    characters = rc->num_resulting_chars;

    sleft = left = allowed_iterations(BIF_P);
    err = 0;
    if (BIF_ARG_3 == am_latin1) {
	latin1 = 1;
    } 
    rest_term = do_build_utf8(BIF_P, BIF_ARG_2, &left, latin1,
			      bytes, &pos, &characters, &err, leftover, &num_leftovers); 
    cost_to_proc(BIF_P, sleft - left);
    return build_list_return(BIF_P,bytes,pos,characters,rest_term,err,
			      leftover,num_leftovers,BIF_ARG_3,left);
}

BIF_RETTYPE unicode_characters_to_list_2(BIF_ALIST_2)
{
    Sint need;
    int latin1;
    Uint characters = 0;
    byte *bytes;
    int pos;
    int err;
    int left, sleft;
    Eterm rest_term;
    byte leftover[4]; /* used for temp buffer too, o
			 therwise 3 bytes would have been enough */
    int num_leftovers = 0;
    Uint cost_of_utf8_need;

    if (BIF_ARG_2 == am_latin1) {
	latin1 = 1;
    } else if (BIF_ARG_2 == am_unicode || BIF_ARG_2 == am_utf8) {
	latin1 = 0;
    } else {
	BIF_TRAP2(c_to_l_int_trap_exportp, BIF_P, BIF_ARG_1, BIF_ARG_2);
    }	
    if (is_binary(BIF_ARG_1) && !latin1) { /* Optimized behaviour for this case */
	    return utf8_to_list(BIF_P,BIF_ARG_1);
    }
    need = utf8_need(BIF_ARG_1,latin1,&cost_of_utf8_need);
    if (need < 0) {
	BIF_ERROR(BIF_P,BADARG);
    }
    bytes = alloc_restart(need);
    cost_to_proc(BIF_P, simple_loops_to_common(cost_of_utf8_need)); 
    left = allowed_iterations(BIF_P) - 
	simple_loops_to_common(cost_of_utf8_need);
    if (left <= 0) {
	/* simplified - let everything be setup by setting left to 1 */
	left = 1;
    }
    sleft = left;
    pos = 0;
    err = 0;


    rest_term = do_build_utf8(BIF_P, BIF_ARG_1, &left, latin1,
			      bytes, &pos, &characters, &err, leftover, &num_leftovers); 
    cost_to_proc(BIF_P, sleft - left); 
    return build_list_return(BIF_P,bytes,pos,characters,rest_term,err,
			     leftover,num_leftovers,BIF_ARG_2,left);
}


/*
 * When input to characters_to_list is a plain binary and the format is 'unicode', we do
 * a faster analyze and size count with this function.
 */
static int analyze_utf8(byte *source, Uint size, 
			byte **err_pos, Uint *num_chars, int *left)
{
    *err_pos = source;
    *num_chars = 0;
    while (size) {
	if (((*source) & ((byte) 0x80)) == 0) {
	    source++;
	    --size; 
	} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
	    if (size < 2) {
		return UTF8_INCOMPLETE;
	    }
	    if (((source[1] & ((byte) 0xC0)) != 0x80) ||
		((*source) < 0xC2) /* overlong */) {
		return UTF8_ERROR;
	    }
	    source += 2;
	    size -= 2;
	} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
	    if (size < 3) {
		return UTF8_INCOMPLETE;
	    }
	    if (((source[1] & ((byte) 0xC0)) != 0x80) ||
		((source[2] & ((byte) 0xC0)) != 0x80) ||
		(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
		return UTF8_ERROR;
	    }
	    if ((((*source) & ((byte) 0xF)) == 0xD) && 
		((source[1] & 0x20) != 0)) {
		return UTF8_ERROR;
	    }
	    if (((*source) == 0xEF) && (source[1] == 0xBF) &&
		((source[2] == 0xBE) || (source[2] == 0xBF))) {
		return UTF8_ERROR;
	    }
	    source += 3;
	    size -= 3;
	} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
	    if (size < 4) {
		return UTF8_INCOMPLETE;
	    }
	    if (((source[1] & ((byte) 0xC0)) != 0x80) ||
		((source[2] & ((byte) 0xC0)) != 0x80) ||
		((source[3] & ((byte) 0xC0)) != 0x80) ||
		(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
		return UTF8_ERROR;
	    }
	    if ((((*source) & ((byte)0x7)) > 0x4U) ||
		((((*source) & ((byte)0x7)) == 0x4U) && 
		 ((source[1] & ((byte)0x3F)) > 0xFU))) {
		return UTF8_ERROR;
	    }
	    source += 4;
	    size -= 4; 
	} else {
	    return UTF8_ERROR;
	}
	++(*num_chars);
	*err_pos = source;
	if (left && --(*left) <= 0) {
	    return UTF8_ANALYZE_MORE;
	}
    }
    return UTF8_OK;
}

/*
 * No errors should be able to occur - no overlongs, no malformed, no nothing
 */    
static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, 
			     Uint left,
			     Uint *num_built, Uint *num_eaten, Eterm tail)
{
    Eterm *hp;
    Eterm ret;
    byte *source, *ssource;
    Uint unipoint;

    ASSERT(num > 0);
    if (left < num) {
	if (left > 0)
	    num = left;
	else
	    num = 1;
    }
    
    *num_built = num; /* Always */

    hp = HAlloc(p,num * 2);
    ret = tail;
    source = bytes + sz;
    ssource = source;
    while(--source >= bytes) {
	if (((*source) & ((byte) 0x80)) == 0) {
	    unipoint = (Uint) *source;
	} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
	    unipoint = 
		(((Uint) ((*source) & ((byte) 0x1F))) << 6) |
		((Uint) (source[1] & ((byte) 0x3F))); 	
	} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
	    unipoint = 
		(((Uint) ((*source) & ((byte) 0xF))) << 12) |
		(((Uint) (source[1] & ((byte) 0x3F))) << 6) |
		((Uint) (source[2] & ((byte) 0x3F))); 	 	
	} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
	    unipoint = 
		(((Uint) ((*source) & ((byte) 0x7))) << 18) |
		(((Uint) (source[1] & ((byte) 0x3F))) << 12) |
		(((Uint) (source[2] & ((byte) 0x3F))) << 6) |
		((Uint) (source[3] & ((byte) 0x3F))); 	 	
	} else {
	    /* ignore 2#10XXXXXX */
	    continue;
	}
	ret = CONS(hp,make_small(unipoint),ret);
	hp += 2;
	if (--num <= 0) {
	    break;
	}
    }
    *num_eaten = (ssource - source);
    return ret;
}

/*
 * The last step of characters_to_list, build a list from the buffer 'bytes' (created in the same way
 * as for characters_to_utf8). All sizes are known in advance and most data will be held in a 
 * "magic binary" during trapping.
 */
static BIF_RETTYPE finalize_list_to_list(Process *p, 
					 byte *bytes,
					 Eterm rest,
					 Uint num_processed_bytes,
					 Uint num_bytes_to_process, 
					 Uint num_resulting_chars, 
					 int state, int left,
					 Eterm tail) 
{
    Uint num_built; /* characters */
    Uint num_eaten; /* bytes */
    Eterm *hp;
    Eterm converted,ret;

    if (!num_bytes_to_process) {
	converted = tail;
    } else {
	num_built = 0;
	num_eaten = 0;
	converted = do_utf8_to_list(p, num_resulting_chars,
				    bytes, num_bytes_to_process,
				    left, &num_built, &num_eaten, tail);
	cost_to_proc(p,num_built);
	
	if (num_built != num_resulting_chars) { /* work left to do */
	    RestartContext rc;

	    rc.num_resulting_chars = num_resulting_chars - num_built;
	    rc.num_bytes_to_process = num_bytes_to_process - num_eaten;
	    rc.num_processed_bytes = num_processed_bytes + num_eaten;
	    rc.state = state;
	    rc.bytes = bytes;
	    BUMP_ALL_REDS(p);
	    BIF_TRAP3(&characters_to_list_trap_2_exp, p, 
		       make_magic_bin_for_restart(p, &rc), rest, converted); 
	}
    }

    /* 
     * OK, no more trapping, let's get rid of the temporary array...
     */

    free_restart(bytes);
    if (state == UTF8_INCOMPLETE) {
	hp = HAlloc(p,4);
	ret = TUPLE3(hp,am_incomplete,converted,rest);
    } else if (state == UTF8_ERROR) {
	hp = HAlloc(p,4);
	ret = TUPLE3(hp,am_error,converted,rest);
    } else {
	ret = converted;
    }

    BIF_RET(ret);
}
 
static BIF_RETTYPE characters_to_list_trap_2(BIF_ALIST_3)
{
    RestartContext *rc;
    byte *bytes;

    rc = get_rc_from_bin(BIF_ARG_1);

    bytes = rc->bytes;
    rc->bytes = NULL; /* Don't want this freed just yet... */
    return finalize_list_to_list(BIF_P, bytes, BIF_ARG_2, rc->num_processed_bytes,
				 rc->num_bytes_to_process, rc->num_resulting_chars,
				 rc->state, allowed_iterations(BIF_P), BIF_ARG_3);
}


/*
 * Hooks into the process of decoding a binary depending on state.
 * If last_state is UTF8_ANALYZE_MORE, num_bytes_to_process 
 * and num_resulting_chars will grow
 * until we're done analyzing the binary. Then we'll eat 
 * the bytes to process, lowering num_bytes_to_process and num_resulting_chars,
 * while increasing num_processed_bytes until we're done. the state 
 * indicates how to return (error, incomplete or ok) in this stage.
 * note that num_processed_bytes and num_bytes_to_process will make up the 
 * length of the binary part to process, not necessarily the length of the 
 * whole binary (if there are errors or an incomplete tail).
 *
 * Analyzing happens from the beginning of the binary towards the end,
 * while result is built from the end of the analyzed/accepted part 
 * towards the beginning.
 *
 * Note that this routine is *only* called when original input was a plain utf8 binary,
 * otherwise the rest and the sizes are known in advance, so finalize_list_to_list is 
 * used to build the resulting list (no analyzing needed).
 */
static BIF_RETTYPE do_bif_utf8_to_list(Process *p, 
				       Eterm orig_bin,
				       Uint num_processed_bytes,
				       Uint num_bytes_to_process, 
				       Uint num_resulting_chars, 
				       int state,
				       Eterm tail) 
{
    int left;
    Uint bitoffs;
    Uint bitsize;
    Uint size;
    byte *bytes;
    Eterm converted = NIL;
    Eterm rest = NIL;
    Eterm *hp;
    Eterm ret;
    byte *temp_alloc = NULL;
    byte *endpos;
    Uint numchar;

    Uint b_sz; /* size of the non analyzed tail */
    Uint num_built; /* characters */
    Uint num_eaten; /* bytes */

    ERTS_GET_BINARY_BYTES(orig_bin, bytes, bitoffs, bitsize);
    if (bitsize != 0) {
	converted = NIL;
	rest = orig_bin;
	goto error_return;
    }
    if (bitoffs != 0) {
	bytes = erts_get_aligned_binary_bytes(orig_bin, &temp_alloc);
    }
    
    size = binary_size(orig_bin);

    left = allowed_iterations(p);
    
    if (state == UTF8_ANALYZE_MORE) {
	state = analyze_utf8(bytes + num_bytes_to_process,
			     size - num_bytes_to_process,
			     &endpos,&numchar,&left);
	cost_to_proc(p,numchar);
	num_resulting_chars += numchar;
	num_bytes_to_process = endpos - bytes;
	if (state == UTF8_ANALYZE_MORE) {
	    Eterm epos = erts_make_integer(num_bytes_to_process,p);
	    Eterm enumchar = erts_make_integer(num_resulting_chars,p);
	    erts_free_aligned_binary_bytes(temp_alloc);
	    BUMP_ALL_REDS(p);
	    BIF_TRAP3(&characters_to_list_trap_3_exp, p, orig_bin, epos, 
		      enumchar);
	}
    } 

    /* 
     * If we're here, we have everything analyzed and are instead building 
     */


    if (!num_bytes_to_process) {
	converted = tail;
    } else {
	num_built = 0;
	num_eaten = 0;
	converted = do_utf8_to_list(p, num_resulting_chars,
				    bytes, num_bytes_to_process,
				    left, &num_built, &num_eaten, tail);
	cost_to_proc(p,num_built);
	
	if (num_built != num_resulting_chars) { /* work left to do */
	    Eterm newnum_resulting_chars = 
		erts_make_integer(num_resulting_chars - num_built,p);
	    Eterm newnum_bytes_to_process = 
		erts_make_integer(num_bytes_to_process - num_eaten,p);
	    Eterm newnum_processed_bytes = 
		erts_make_integer(num_processed_bytes + num_eaten,p);
	    Eterm traptuple;
	    hp = HAlloc(p,7);
	    traptuple = TUPLE6(hp,orig_bin,newnum_processed_bytes,
			       newnum_bytes_to_process, 
			       newnum_resulting_chars,
			       make_small(state),
			       converted);
	    BUMP_ALL_REDS(p);
	    erts_free_aligned_binary_bytes(temp_alloc);
	    BIF_TRAP1(&characters_to_list_trap_4_exp,p,traptuple);
	}
    }

    /* 
     * OK, no more trapping, let's build rest binary if there should
     * be one. 
     */

    b_sz = size - (num_bytes_to_process + num_processed_bytes);

    if (b_sz) {
	ErlSubBin *sb;
	Eterm orig;
	Uint offset;
	ASSERT(state != UTF8_OK);
	hp = HAlloc(p, ERL_SUB_BIN_SIZE);
	sb = (ErlSubBin *) hp;
	ERTS_GET_REAL_BIN(orig_bin, orig, offset, bitoffs, bitsize);
	sb->thing_word = HEADER_SUB_BIN;
	sb->size = b_sz;
	sb->offs = num_bytes_to_process + num_processed_bytes;
	sb->orig = orig;
	sb->bitoffs = bitoffs;
	sb->bitsize = bitsize;
	sb->is_writable = 0;
	rest = make_binary(sb);
    } 

    /* Done */

    if (state == UTF8_INCOMPLETE) {
	if (check_leftovers(bytes + num_bytes_to_process + num_processed_bytes,
			    b_sz) != 0) {
	    goto error_return;
	}
	hp = HAlloc(p,4);
	ret = TUPLE3(hp,am_incomplete,converted,rest);
    } else if (state == UTF8_ERROR) {
 error_return:
	hp = HAlloc(p,4);
	ret = TUPLE3(hp,am_error,converted,rest);
    } else {
	ret = converted;
    }

    erts_free_aligned_binary_bytes(temp_alloc);
    BIF_RET(ret);
}


/* 
 * This is called when there's still analyzing left to do,
 * we only reach this if original input was a binary.
 */

static BIF_RETTYPE characters_to_list_trap_3(BIF_ALIST_3)
{
    Uint num_bytes_to_process;
    Uint num_resulting_chars;

    term_to_Uint(BIF_ARG_2, &num_bytes_to_process); /* The number of already
						       analyzed and accepted 
						       bytes */
    term_to_Uint(BIF_ARG_3, &num_resulting_chars); /* The number of chars
						      procuced by the
						      already analyzed
						      part of the binary */

    /*erts_printf("Trap: %T, %T, %T\n",BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);*/

    return do_bif_utf8_to_list(BIF_P, 
			       BIF_ARG_1, /* the binary */ 
			       0U, /* nothing processed yet */
			       num_bytes_to_process, 
			       num_resulting_chars,
			       UTF8_ANALYZE_MORE, /* always this state here */
			       NIL); /* Nothing built -> no tail yet */
	
}

/*
 * This is called when analyzing is done and we are trapped during building,
 * we only reach this if original input was a binary.
 */
static BIF_RETTYPE characters_to_list_trap_4(BIF_ALIST_1)
{
    Uint num_processed_bytes;
    Uint num_bytes_to_process;
    Uint num_resulting_chars;
    Eterm orig_bin, tail;
    int last_state;
    Eterm *tplp = tuple_val(BIF_ARG_1);

    orig_bin = tplp[1];
    term_to_Uint(tplp[2], &num_processed_bytes);
    term_to_Uint(tplp[3], &num_bytes_to_process);
    term_to_Uint(tplp[4], &num_resulting_chars);
    last_state = (int) signed_val(tplp[5]);
    tail = tplp[6];

    /*erts_printf("Trap: {%T, %lu, %lu, %lu, %d, %T}\n",
      orig_bin, num_processed_bytes, num_bytes_to_process, 
      num_resulting_chars, last_state, tail);*/

    return do_bif_utf8_to_list(BIF_P, 
			       orig_bin, /* The whole binary */
			       num_processed_bytes,  /* Number of bytes 
							already processed */
			       num_bytes_to_process, /* Bytes left to proc. */
			       num_resulting_chars,  /* Num chars left to 
							build */
			       last_state,           /* The current state 
							(never ANALYZE_MORE)*/
			       tail);                /* The already built 
							tail */  
	
}
/*
 * This is only used when characters are a plain unicode (utf8) binary.
 * Instead of building an utf8 buffer, we analyze the binary given and use that.
 */

static BIF_RETTYPE utf8_to_list(BIF_ALIST_1)
{
    if (!is_binary(BIF_ARG_1) || aligned_binary_size(BIF_ARG_1) < 0) {
	BIF_ERROR(BIF_P,BADARG);
    }
    return do_bif_utf8_to_list(BIF_P, BIF_ARG_1, 0U, 0U, 0U, 
			       UTF8_ANALYZE_MORE,NIL);
}


BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2)
{
    Atom* ap;

    if (is_not_atom(BIF_ARG_1)) {
	goto error;
    }

    ap = atom_tab(atom_val(BIF_ARG_1));

    if (BIF_ARG_2 == am_latin1) {
	BIF_RET(new_binary(BIF_P, ap->name, ap->len));
    } else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) {
	int bin_size = 0;
	int i;
	Eterm bin_term;
	byte* bin_p;

	for (i = 0; i < ap->len; i++) {
	    bin_size += (ap->name[i] >= 0x80) ? 2 : 1;
	}
	if (bin_size == ap->len) {
	    BIF_RET(new_binary(BIF_P, ap->name, ap->len));
	}
	bin_term = new_binary(BIF_P, 0, bin_size);
	bin_p = binary_bytes(bin_term);
	for (i = 0; i < ap->len; i++) {
	    byte b = ap->name[i];
	    if (b < 0x80) {
		*bin_p++ = b;
	    } else {
		*bin_p++ = 0xC0 | (b >> 6);
		*bin_p++ = 0x80 | (b & 0x3F);
	    }
	}
	BIF_RET(bin_term);
    } else {
    error:
	BIF_ERROR(BIF_P, BADARG);
    }
}

static BIF_RETTYPE
binary_to_atom(Process* p, Eterm bin, Eterm enc, int must_exist)
{
    byte* bytes;
    byte *temp_alloc = NULL;
    Uint bin_size;

    if ((bytes = erts_get_aligned_binary_bytes(bin, &temp_alloc)) == 0) {
	BIF_ERROR(p, BADARG);
    }
    bin_size = binary_size(bin);
    if (enc == am_latin1) {
	Eterm a;
	if (bin_size > MAX_ATOM_LENGTH) {
	system_limit:
	    erts_free_aligned_binary_bytes(temp_alloc);
	    BIF_ERROR(p, SYSTEM_LIMIT);
	}
	if (!must_exist) {
	    BIF_RET(am_atom_put((char *)bytes, bin_size));
	} else if (erts_atom_get((char *)bytes, bin_size, &a)) {
	    BIF_RET(a);
	} else {
	    goto badarg;
	}
    } else if (enc == am_utf8 || enc == am_unicode) {
	char *buf;
	char *dst;
	int i;
	int num_chars;
	Eterm res;

	if (bin_size > 2*MAX_ATOM_LENGTH) {
	    byte* err_pos;
	    Uint n;
	    int reds_left = bin_size+1; /* Number of reductions left. */

	    if (analyze_utf8(bytes, bin_size, &err_pos,
			     &n, &reds_left) == UTF8_OK) {
		/* 
		 * Correct UTF-8 encoding, but too many characters to
		 * fit in an atom.
		 */
		goto system_limit;
	    } else {
		/*
		 * Something wrong in the UTF-8 encoding or Unicode code
		 * points > 255.
		 */
		goto badarg;
	    }
	}

	/*
	 * Allocate a temporary buffer the same size as the binary,
	 * so that we don't need an extra overflow test.
	 */
	buf = (char *) erts_alloc(ERTS_ALC_T_TMP, bin_size);
	dst = buf;
	for (i = 0; i < bin_size; i++) {
	    int c = bytes[i];
	    if (c < 0x80) {
		*dst++ = c;
	    } else if (i < bin_size-1) {
		int c2;
		if ((c & 0xE0) != 0xC0) {
		    goto free_badarg;
		}
		i++;
		c = (c & 0x3F) << 6;
		c2 = bytes[i];
		if ((c2 & 0xC0) != 0x80) {
		    goto free_badarg;
		}
		c = c | (c2 & 0x3F);
		if (0x80 <= c && c < 256) {
		    *dst++ = c;
		} else {
		    goto free_badarg;
		}
	    } else {
	    free_badarg:
		erts_free(ERTS_ALC_T_TMP, (void *) buf);
		goto badarg;
	    }
	}
	num_chars = dst - buf;
	if (num_chars > MAX_ATOM_LENGTH) {
	    erts_free(ERTS_ALC_T_TMP, (void *) buf);
	    goto system_limit;
	}
	if (!must_exist) {
	    res = am_atom_put(buf, num_chars);
	    erts_free(ERTS_ALC_T_TMP, (void *) buf);
	    BIF_RET(res);
	} else {
	    int exists = erts_atom_get(buf, num_chars, &res);
	    erts_free(ERTS_ALC_T_TMP, (void *) buf);
	    if (exists) {
		BIF_RET(res);
	    } else {
		goto badarg;
	    }
	}
    } else {
    badarg:
	erts_free_aligned_binary_bytes(temp_alloc);
	BIF_ERROR(p, BADARG);
    }
}

BIF_RETTYPE binary_to_atom_2(BIF_ALIST_2)
{
    return binary_to_atom(BIF_P, BIF_ARG_1, BIF_ARG_2, 0);
}

BIF_RETTYPE binary_to_existing_atom_2(BIF_ALIST_2)
{
    return binary_to_atom(BIF_P, BIF_ARG_1, BIF_ARG_2, 1);
}
