// Copyright (C) 2003 Samy Bengio (bengio@idiap.ch)
//                
// This file is part of Torch 3.
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "EditDistance.h"
#include "XFile.h"

namespace Torch {

EditDistance::EditDistance()
{
  obtained = NULL;
  desired = NULL;
  obt_size = 0;
  des_size = 0;
  insert_cost = 1;
  delete_cost = 1;
  subst_cost = 1;
  reset();
}

void EditDistance::setCosts(int i_cost, int d_cost, int s_cost)
{
  insert_cost = i_cost;
  delete_cost = d_cost;
  subst_cost = s_cost;
}

void EditDistance::reset()
{
  accuracy = 0;
  n_insert=0;
  n_delete=0;
  n_subst=0;
  n_seq=0;
}

void EditDistance::distance(int* obtained_, int obt_size_, int* desired_, int des_size_)
{
  obtained = obtained_;
  obt_size = obt_size_;
  desired = desired_;
  des_size = des_size_;

  n_insert = 0;
  n_delete = 0;
  n_subst = 0;
  int subst;
  Allocator allocator_;
  int **d = (int**)allocator_.alloc((des_size+1)*sizeof(int*));
  int **d_ins = (int**)allocator_.alloc((des_size+1)*sizeof(int*));
  int **d_del = (int**)allocator_.alloc((des_size+1)*sizeof(int*));
  int **d_sub = (int**)allocator_.alloc((des_size+1)*sizeof(int*));
  for (int i=0;i<des_size+1;i++) {
    d[i] = (int*)allocator_.alloc((obt_size+1)*sizeof(int));
    d_ins[i] = (int*)allocator_.alloc((obt_size+1)*sizeof(int));
    d_del[i] = (int*)allocator_.alloc((obt_size+1)*sizeof(int));
    d_sub[i] = (int*)allocator_.alloc((obt_size+1)*sizeof(int));
    for (int j=0;j<obt_size+1;j++) {
      d[i][j] = 0;
      d_ins[i][j] = 0;
      d_del[i][j] = 0;
      d_sub[i][j] = 0;
    }
  }
  for (int i=0;i<des_size;i++) {
    d[i+1][0] = d[i][0] + delete_cost;
    d_del[i+1][0] = d[i+1][0];
  }
  for (int i=0;i<obt_size;i++) {
    d[0][i+1] = d[0][i] + insert_cost;
    d_ins[0][i+1] = d[0][i+1];
  }
  for (int i=0;i<des_size;i++) {
    for (int j=0;j<obt_size;j++) {
      if (desired[i] == obtained[j]) {
        subst = 0;
      } else {
        subst = subst_cost;
      }
      int s_cost = d[i][j]+subst;
      int d_cost = d[i][j+1]+delete_cost;
      int i_cost = d[i+1][j]+insert_cost;
      if (s_cost <= d_cost && s_cost <= i_cost) {
        d[i+1][j+1] = s_cost;
        d_sub[i+1][j+1] = d_sub[i][j]+subst;
        d_del[i+1][j+1] = d_del[i][j];
        d_ins[i+1][j+1] = d_ins[i][j];
      } else if (d_cost <= i_cost && d_cost <= s_cost) {
        d[i+1][j+1] = d_cost;
        d_del[i+1][j+1] = d_del[i][j+1]+delete_cost;
        d_sub[i+1][j+1] = d_sub[i][j+1];
        d_ins[i+1][j+1] = d_ins[i][j+1];
      } else {
        d[i+1][j+1] = i_cost;
        d_ins[i+1][j+1] = d_ins[i+1][j]+insert_cost;
        d_del[i+1][j+1] = d_del[i+1][j];
        d_sub[i+1][j+1] = d_sub[i+1][j];
      }
    }
  }
  n_subst = d_sub[des_size][obt_size] / subst_cost;
  n_delete = d_del[des_size][obt_size] / delete_cost;
  n_insert = d_ins[des_size][obt_size] / insert_cost;
  n_seq = des_size;
  //dist = d[des_size][obt_size];
  accuracy = (n_seq - n_delete - n_subst - n_insert) * 100. / (real)n_seq;

}

void EditDistance::add(EditDistance* d)
{
  n_insert += d->n_insert;
  n_delete += d->n_delete;
  n_subst += d->n_subst;
  n_seq += d->n_seq;
  accuracy = (n_seq - n_delete - n_subst - n_insert) * 100. / (real)n_seq;
}

void EditDistance::print(XFile *f)
{
  f->printf("total %d insert %d delete %d subst %d N %d\n",
    n_insert+n_delete+n_subst,n_insert,n_delete,n_subst,n_seq);
  f->flush();
}

void EditDistance::printRatio(XFile *f)
{
  f->printf("accuracy %5.2f insert %5.2f delete %5.2f subst %5.2f N %d\n",  
    accuracy,n_insert*100./n_seq,n_delete*100./n_seq,n_subst*100./n_seq,n_seq);
  f->flush();
}

EditDistance::~EditDistance() 
{
}

}

