/**
 * @file   ngram_read_bin.c
 * 
 * <JA>
 * @brief  ХʥN-gramեɤ߹
 *
 * ХʥǤ 2-gram ȵո 3-gram 1ĤΥե
 * ƤޤХʥJuilusȼΤߤ򥵥ݡȤƤꡤ
 * ¾ΥХʥȸߴϤޤΤդƲ
 *
 * rev.3.5 ꡤХʥN-gramΥեΰѹޤ
 * Хȥ Big endian ꤫ޥ¸ѹ(إå
 * Ѵξ򵭽), ޤǥå 24bit 
 *  2-gram ΥХåեǡΰ̤Ԥޤ
 * ˤꡤ3.5 ʹߤ mkbingram ХʥN-gram,
 * 3.4.2 Julius ǤϻȤޤΤդƤ
 * (إååǥ顼Ȥʤ)
 *
 * ʤ 3.5 ʹߤ Julius ǤϽΥǥʤɤ롥ξ,
 * ǥå 24bit ȥХåդΰ̤ϥǥɤ߹߻
 * ٹԤ롥ޤХȥϥإå򸫤ŬѴΤǡ
 * ۤʤХȥΥޥ
 * ХʥN-gramǤʤɤ롥Υǥ⤽Τޤ
 * ɤ߹롥
 * 
 * </JA>
 * 
 * <EN>
 * @brief  Read binary foramt N-gram file
 *
 * In binary format, both 2-gram and reverse 3-gram are stored
 * together in one file.  This binary format is not
 * compatible with other binary format of language model.
 * 
 * From 3.5, internal format of binary N-gram has changed for using
 * machine-dependent natural byte order (previously fixed to big endian),
 * 24bit index and 2-gram backoff compression.  So, binary N-gram
 * generated by mkbingram of 3.5 and later will not work on 3.4.2 and
 * earlier versions.
 *
 * There is full upward- and cross-machine compatibility in 3.5.  Old
 * binary N-gram files still can be read directly, in which case the conversion
 * to 24bit index will performed just after model has been read.
 * Byte order will also considered by header information, so
 * binary N-gram still can be used among different machines.
 * </EN>
 * 
 * @author Akinobu LEE
 * @date   Wed Feb 16 17:12:08 2005
 *
 * $Revision: 1.6 $
 * 
 */
/*
 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
 * All rights reserved
 */

#include <sent/stddefs.h>
#include <sent/ngram2.h>

static int file_version;  ///< N-gram format version of the file
static boolean need_swap; ///< TRUE if need byte swap
#ifdef WORDS_INT
static boolean need_conv;	///< TRUE if need conversion of word ID from 2 bytes to 4 bytes
static boolean words_int_retry = FALSE; ///< TRUE if retrying with conversion
#endif

/**
 * 
 * 
 */
#define rdn(A,B,C,D) if (rdnfunc(A,B,C,D) == FALSE) return FALSE
#define rdn_wordid(A,B,C,D) if (rdn_wordid_func(A,B,C,D) == FALSE) return FALSE
/** 
 * Binary read function with byte swap
 * 
 * @param fp [in] file pointer
 * @param buf [out] data buffer
 * @param unitbyte [in] unit size in bytes
 * @param unitnum [in] number of unit to read.
 */
static boolean
rdnfunc(FILE *fp, void *buf, size_t unitbyte, size_t unitnum)
{
  size_t tmp;
  if ((tmp = myfread(buf, unitbyte, unitnum, fp)) < unitnum) {
    jlog("Error: ngram_read_bin: failed to read %d bytes\n", unitbyte*unitnum);
    return FALSE;
  }
  if (need_swap) {
    if (unitbyte != 1) {
      swap_bytes(buf, unitbyte, unitnum);
    }
  }
  return TRUE;
}

#ifdef WORDS_INT
/** 
 * Binary read function with byte swap and word id conversion
 * 
 * @param fp [in] file pointer
 * @param buf [out] data buffer
 * @param unitnum [in] number of unit to read.
 * @param need_conv [in] TRUE if need conversion from 2byte to 4byte
 */
static boolean
rdn_wordid_func(FILE *fp, void *buf, int unitnum, boolean need_conv)
{
  int i;
  unsigned short *s;
  WORD_ID *t;
  WORD_ID d;

  if (need_conv) {
    /* read unsigned short units */
    rdn(fp, buf, sizeof(unsigned short), unitnum);
    /* convert them to WORD_ID (integer) */
    for(i=unitnum-1;i>=0;i--) {
      s = (unsigned short *)buf + i;
      t = (WORD_ID *)buf + i;
      d = *s;
      *t = d;
    }
  } else {
    /* read as usual */
    rdn(fp, buf, sizeof(WORD_ID), unitnum);
  }
  return TRUE;
}
#endif

/** 
 * Check header to see whether the version matches.
 * 
 * @param fp [in] file pointer
 */
static boolean
check_header(FILE *fp)
{
  char buf[BINGRAM_HDSIZE], *p;

  rdn(fp, buf, 1, BINGRAM_HDSIZE);
  p = buf;
#ifdef WORDS_INT
  need_conv = FALSE;
#endif

  /* version check */
  if (strnmatch(p, BINGRAM_IDSTR, strlen(BINGRAM_IDSTR))) {
    /* bingram file made by mkbingram before 3.4.2 */
    file_version = 3;
    p += strlen(BINGRAM_IDSTR) + 1;
  } else if (strnmatch(p, BINGRAM_IDSTR_V4, strlen(BINGRAM_IDSTR_V4))) {
    /* bingram file made by mkbingram later than 3.5 */
    file_version = 4;
    p += strlen(BINGRAM_IDSTR_V4) + 1;
  } else if (strnmatch(p, BINGRAM_IDSTR_V5, strlen(BINGRAM_IDSTR_V5))) {
    /* bingram file made by JuliusLib-4 and later */
    file_version = 5;
    p += strlen(BINGRAM_IDSTR_V5) + 1;
  } else {
    /* not a bingram file */
    jlog("Error: ngram_read_bin: invalid header\n");
    return FALSE;
  }
  /* word size check (for bingram build by mkbingram 3.3p5 and later */
  if (strnmatch(p, BINGRAM_SIZESTR_HEAD, strlen(BINGRAM_SIZESTR_HEAD))) {
    p += strlen(BINGRAM_SIZESTR_HEAD);
    if (! strnmatch(p, BINGRAM_SIZESTR_BODY, strlen(BINGRAM_SIZESTR_BODY))) {
      /* word size does not match (int / short) */
#ifdef WORDS_INT
      if (strnmatch(p, BINGRAM_SIZESTR_BODY_2BYTE, strlen(BINGRAM_SIZESTR_BODY_2BYTE))) {
	/* this is 2-byte word ID, will convert while reading */
	jlog("Warning: ngram_read_bin: 2-bytes bingram, converting to 4 bytes\n");
	need_conv = TRUE;
	p += strlen(BINGRAM_SIZESTR_BODY_2BYTE) + 1;
      } else {
	jlog("Error: ngram_read_bin: unknown word byte size!\n");
	return FALSE;
      }
#else
      if (strnmatch(p, BINGRAM_SIZESTR_BODY_4BYTE, strlen(BINGRAM_SIZESTR_BODY_4BYTE))) {
	/*** 4bytes to 2bytes not implemented, just terminate here... ***/
	jlog("Error: ngram_read_bin: cannot handle 4-bytes bingram\n");
	jlog("Error: ngram_read_bin: please use Julius compiled with --enable-words-int\n");
	return FALSE;
	//p += strlen(BINGRAM_SIZESTR_BODY_4BYTE) + 1;
      } else {
	jlog("Error: ngram_read_bin: unknown word byte size!\n");
	return FALSE;
      }
#endif
    } else {
      p += strlen(BINGRAM_SIZESTR_BODY) + 1;
    }

    /* byte order check (v4 (rev.3.5) and later) */
    if (file_version >= 4) {
      if (!strnmatch(p, BINGRAM_BYTEORDER_HEAD, strlen(BINGRAM_BYTEORDER_HEAD))) {
	jlog("Error: ngram_read_bin: no information for byte order??\n");
	return FALSE;
      }
      p += strlen(BINGRAM_BYTEORDER_HEAD);
      if (! strnmatch(p, BINGRAM_NATURAL_BYTEORDER, strlen(BINGRAM_NATURAL_BYTEORDER))) {
	/* file endian and running endian is different, need swapping */
	need_swap = TRUE;
      } else {
	need_swap = FALSE;
      }
      p += strlen(BINGRAM_NATURAL_BYTEORDER) + 1;
    }
  } /* if no BINGRAM_SIZESTR_HEAD found, just pass it */

  /* in case of V3 bingram file, the unit size of word_id and its byte order
     cannot be determined from the header.  In that case, we assume 
     byteorder to be a BIG ENDIAN.  The word_id unit size (2byte in normal,
     or 4byte if bingram generated with mkbingram with --enable-words-int)
     will be automagically detected.
     */

  if (file_version < 4) {
    /* assume input as big endian */
#ifdef WORDS_BIGENDIAN
    need_swap = FALSE;
#else
    need_swap = TRUE;
#endif
  }
    
  /*jlog("%s",buf);*/

  return TRUE;
}

static boolean
ngram_read_bin_v5(FILE *fp, NGRAM_INFO *ndata)
{
  int i,n,len;
  char *w, *p;
#ifdef WORDS_INT
  unsigned short *buf;
#endif
  NGRAM_TUPLE_INFO *t;

  /* read some info extended from version 5 */
  rdn(fp, &(ndata->n), sizeof(int), 1);
  rdn(fp, &(ndata->dir), sizeof(int), 1);
  rdn(fp, &(ndata->bigram_index_reversed), sizeof(boolean), 1);

  jlog("Stat: ngram_read_bin_v5: this is %s %d-gram file\n", (ndata->dir == DIR_LR) ? "forward" : "backward", ndata->n);

  if (ndata->n > MAX_N) {
    jlog("Error: ngram_read_bin_v5: too long N-gram (N=%d)\n", n);
    jlog("Error: ngram_read_bin_v5: current maximum length of N-gram is set to %d\n", MAX_N);
    jlog("Error: ngram_read_bin_v5: you can expand the limit by setting MAX_N in \"sent/ngram.h\"\n");
    return FALSE;
  }

  /* read total info and set max_word_num */
  for(n=0;n<ndata->n;n++) {
    rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);
  }
  ndata->max_word_num = ndata->d[0].totalnum;

  /* read wname */
  rdn(fp, &len, sizeof(int), 1);
  w = mymalloc(len);
  rdn(fp, w, 1, len);
  /* assign... */
  ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num);
  p = w; i = 0;
  while (p < w + len) {
    ndata->wname[i++] = p;
    while(*p != '\0') p++;
    p++;
  }
  if (i != ndata->max_word_num) {
    jlog("Error: ngram_read_bin_v5: wname error??\n");
    return FALSE;
  }

  /* read N-gram */
  for(n=0;n<ndata->n;n++) {
    jlog("stat: ngram_read_bin_v5: reading %d-gram\n", n+1);

    t = &(ndata->d[n]);
    
    rdn(fp, &(t->is24bit), sizeof(boolean), 1);
    rdn(fp, &(t->ct_compaction), sizeof(boolean), 1);
    rdn(fp, &(t->bgnlistlen), sizeof(NNID), 1);
    rdn(fp, &(t->context_num), sizeof(NNID), 1);

    if (n > 0) {
      if (t->is24bit) {
	t->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->bgnlistlen);
	rdn(fp, t->bgn_upper, sizeof(NNID_UPPER), t->bgnlistlen);
	t->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->bgnlistlen);
	rdn(fp, t->bgn_lower, sizeof(NNID_LOWER), t->bgnlistlen);
      } else {
	t->bgn = (NNID *)mymalloc_big(sizeof(NNID), t->bgnlistlen);
	rdn(fp, t->bgn, sizeof(NNID), t->bgnlistlen);
      }
      t->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->bgnlistlen);
      rdn(fp, t->num, sizeof(WORD_ID), t->bgnlistlen);
      t->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->totalnum);
      rdn(fp, t->nnid2wid, sizeof(WORD_ID), t->totalnum);
    } else {
      t->bgn_upper = NULL;
      t->bgn_lower = NULL;
      t->bgn = NULL;
      t->num = NULL;
      t->bgnlistlen = 0;
      t->nnid2wid = NULL;
    }

    t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum);
    rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum);

    rdn(fp, &i, sizeof(int), 1);
    if (i == 1) {
      t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
      rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);
    } else {
      t->bo_wt = NULL;
    }
    rdn(fp, &i, sizeof(int), 1);
    if (i == 1) {
      t->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->totalnum);
      t->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->totalnum);
      rdn(fp, t->nnid2ctid_upper, sizeof(NNID_UPPER), t->totalnum);
      rdn(fp, t->nnid2ctid_lower, sizeof(NNID_LOWER), t->totalnum);
    } else {
      t->nnid2ctid_upper = NULL;
      t->nnid2ctid_lower = NULL;
    }
  }
  rdn(fp, &i, sizeof(int), 1);
  if (i == 1) {
    ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[0].context_num);
    rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), ndata->d[0].context_num);
  } else {
    ndata->bo_wt_1 = NULL;
  }
  rdn(fp, &i, sizeof(int), 1);
  if (i == 1) {
    ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[1].totalnum);
    rdn(fp, ndata->p_2, sizeof(LOGPROB), ndata->d[1].totalnum);
  } else {
    ndata->p_2 = NULL;
  }

  return TRUE;
}

static boolean
ngram_read_bin_compat(FILE *fp, NGRAM_INFO *ndata, int *retry_ret)
{
  int i,n,len;
  char *w, *p;
  NNID *n3_bgn;
  NNID d, ntmp;
#ifdef WORDS_INT
  unsigned short *buf;
#endif
  NGRAM_TUPLE_INFO *t, *tt, *ttt;

  /* old binary N-gram assumes these types */
  ndata->bigram_index_reversed = TRUE;
  ndata->n = 3;
  ndata->dir = DIR_RL;

  /* read total info and set max_word_num */
  for(n=0;n<ndata->n;n++) {
    rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);
  }
  ndata->max_word_num = ndata->d[0].totalnum;

  if (file_version == 4) {
    rdn(fp, &(ndata->d[1].context_num), sizeof(NNID), 1);
  }

  for(n=0;n<ndata->n;n++) {
    if (n < 2) {
      ndata->d[n].is24bit = FALSE;
    } else {
      if (ndata->d[n].totalnum >= NNID_MAX_24) {
	jlog("Warning: ngram_read_bin_compat: num of %d-gram exceeds 24bit, now switch to %dbit index\n", n+1, sizeof(NNID) * 8);
	ndata->d[n].is24bit = FALSE;
      } else {
	ndata->d[n].is24bit = TRUE;
      }
    }
    ndata->d[n].nnid2ctid_upper = NULL;
    ndata->d[n].nnid2ctid_lower = NULL;
  }
  /* always do back-off compaction for 3-gram and up */
  /* mark 2-gram and up */
  ndata->d[0].ct_compaction = FALSE;
  for(n=1;n<ndata->n;n++) {
    ndata->d[n].ct_compaction = TRUE;
  }

  /* read wname */
  rdn(fp, &len, sizeof(int), 1);
  w = mymalloc(len);
  rdn(fp, w, 1, len);
  /* assign... */
  ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num);
  p = w; i = 0;
  while (p < w + len) {
    ndata->wname[i++] = p;
    while(*p != '\0') p++;
    p++;
  }
  if (i != ndata->max_word_num) {
    jlog("Error: ngram_read_bin_compat: wname error??\n");
    return FALSE;
  }

  /* malloc 1-gram */
  t = &(ndata->d[0]);
  tt = &(ndata->d[1]);
  ttt = &(ndata->d[2]);

  t->bgn_upper = NULL;
  t->bgn_lower = NULL;
  t->bgn = NULL;
  t->num = NULL;
  t->bgnlistlen = 0;
  t->nnid2wid = NULL;
  t->nnid2ctid_upper = NULL;
  t->nnid2ctid_lower = NULL;

  t->context_num = t->totalnum;
  t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum);
  ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
  t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
  tt->bgnlistlen = t->context_num;
  tt->bgn = (NNID *)mymalloc_big(sizeof(NNID), tt->bgnlistlen);
  tt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->bgnlistlen);

  /* read 1-gram */
  jlog("stat: ngram_read_bin_compat: reading 1-gram\n");
  rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum);
  rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), t->context_num);
  rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);
  rdn(fp, tt->bgn, sizeof(NNID), tt->bgnlistlen);
#ifdef WORDS_INT
  rdn_wordid(fp, tt->num, tt->bgnlistlen, need_conv);
#else
  rdn(fp, tt->num, sizeof(WORD_ID), tt->bgnlistlen);
#endif

#ifdef WORDS_INT
  {
    /* check if we are wrongly reading word_id=2byte bingram
       (if bingram version >= 4, this should not be happen because
        header correctly tells the word_id byte size.  This will 
	occur only if matches all the conditions below:
	- you run Julius with --enable-words-int,
	- you use old bingram of version <= 3, and
	- you use bingram file converted without --enable-words-int
     */
    WORD_ID w;
    for(w=0;w<ndata->max_word_num;w++) {
      if (ndata->d[1].num[w] > ndata->max_word_num) {
	if (words_int_retry) {
	  jlog("Error: ngram_read_bin_compat: retry failed, wrong bingram format\n");
	  return FALSE;
	}
	jlog("Warning: ngram_read_bin_compat: incorrect data, may be a 2-byte v3 bingram, retry with conversion\n");
	free(ndata->wname[0]);
	free(ndata->wname);
	free(t->prob);
	free(ndata->bo_wt_1);
	free(t->bo_wt);
	free(tt->bgn);
	free(tt->num);
	myfrewind(fp);
	words_int_retry = TRUE;
	*retry_ret = 1;
	return FALSE;
      }
    }
  }
#endif

  /* malloc the rest */
  tt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->totalnum);
  tt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);
  ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);
  if (file_version == 4) {	/* context compaction and 24bit */
    tt->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), tt->totalnum);
    tt->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), tt->totalnum);
    tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);
    ttt->bgnlistlen = tt->context_num;
    ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);
    ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);
    ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);
  } else {
    tt->context_num = tt->totalnum;
    tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);
    ttt->bgnlistlen = tt->context_num;
    ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);
    if (ttt->is24bit) {
      ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);
      ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);
      n3_bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);
    } else {
      ttt->bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);
    }
  }
      
  ttt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->totalnum);
  ttt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ttt->totalnum);
  ttt->bo_wt = NULL;
  
  /* read 2-gram*/
  jlog("Stat: ngram_read_bin_compat: reading 2-gram\n");
#ifdef WORDS_INT
  rdn_wordid(fp, tt->nnid2wid, tt->totalnum, need_conv);
#else
  rdn(fp, tt->nnid2wid, sizeof(WORD_ID), tt->totalnum);
#endif
  rdn(fp, ndata->p_2, sizeof(LOGPROB), tt->totalnum);
  rdn(fp, tt->prob, sizeof(LOGPROB), tt->totalnum);
  if (file_version == 4) {
    rdn(fp, tt->nnid2ctid_upper, sizeof(NNID_UPPER), tt->totalnum);
    rdn(fp, tt->nnid2ctid_lower, sizeof(NNID_LOWER), tt->totalnum);
    rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);
    rdn(fp, ttt->bgn_upper, sizeof(NNID_UPPER), ttt->bgnlistlen);
    rdn(fp, ttt->bgn_lower, sizeof(NNID_LOWER), ttt->bgnlistlen);
#ifdef WORDS_INT
    rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);
#else
    rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);
#endif
  } else {
    rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);
    if (ttt->is24bit) {
      rdn(fp, n3_bgn, sizeof(NNID), ttt->bgnlistlen);
      for(d=0;d<ttt->bgnlistlen;d++) {
	if (n3_bgn[d] == NNID_INVALID) {
	  ttt->bgn_lower[d] = 0;
	  ttt->bgn_upper[d] = NNID_INVALID_UPPER;
	} else {
	  ntmp = n3_bgn[d] & 0xffff;
	  ttt->bgn_lower[d] = ntmp;
	  ntmp = n3_bgn[d] >> 16;
	  ttt->bgn_upper[d] = ntmp;
	}
      }
    } else {
      rdn(fp, ttt->bgn, sizeof(NNID), ttt->bgnlistlen);
    }
#ifdef WORDS_INT
    rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);
#else
    rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);
#endif
  }

  /* read 3-gram*/
  jlog("Stat: ngram_read_bin_compat: reading 3-gram\n");
#ifdef WORDS_INT
  rdn_wordid(fp, ttt->nnid2wid, ttt->totalnum, need_conv);
#else
  rdn(fp, ttt->nnid2wid, sizeof(WORD_ID), ttt->totalnum);
#endif
  rdn(fp, ttt->prob, sizeof(LOGPROB), ttt->totalnum);

  /* compact the 2-gram back-off and 3-gram links */
  if (file_version != 4) {
    if (ttt->is24bit) {
      free(n3_bgn);
      if (ngram_compact_context(ndata, 2) == FALSE) return FALSE;
    }
  }
  
  return TRUE;
}


/** 
 * Read a N-gram binary file and store to data.
 * 
 * @param fp [in] file pointer
 * @param ndata [out] N-gram data to store the read data
 * 
 * @return TRUE on success, FALSE on failure.
 */
boolean
ngram_read_bin(FILE *fp, NGRAM_INFO *ndata)
{
  int retry;

#ifdef WORDS_INT
  /* reset retry flag */
  words_int_retry = FALSE;
  /* when retrying, it restarts from here with words_int_retry = TRUE */
 ngram_read_bin_start:
#endif
  
  ndata->from_bin = TRUE;

  /* check initial header */
  if (check_header(fp) == FALSE) return FALSE;
  
#ifdef WORDS_INT
  /* in retry mode, force word_id conversion  */
  if (words_int_retry) need_conv = TRUE;
#endif
  
#ifdef WORDS_INT
  if (need_conv) jlog("Stat: ngram_read_bin: word-id size conversion enabled\n");
#endif

  if (file_version <= 4) {
    retry = 0;
    if (ngram_read_bin_compat(fp, ndata, &retry) == FALSE) {
#ifdef WORDS_INT
      if (retry == 1) {
	goto ngram_read_bin_start;
      } else {
	return FALSE;
      }
#else
      return FALSE;
#endif
    }
  } else {
    if (ngram_read_bin_v5(fp, ndata) == FALSE) return FALSE;
  }


  /* make word search tree for later lookup */
  jlog("Stat: ngram_read_bin: making entry name index\n");
  ngram_make_lookup_tree(ndata);

  bi_prob_func_set(ndata);

  return TRUE;
}

