/* Copyright 2001 Matt Flax <flatmax@ieee.org>
   This file is part of MFFM Time Scale Modification for Audio.

   MFFM Time Scale Modification for Audio is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   MFFM Time Scale Modification for Audio is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You have received a copy of the GNU General Public License
   along with MFFM Time Scale Modification for Audio
 */
#ifndef WSOLA_H_
#define WSOLA_H_

//This function is for scientific rounding
#ifndef roundD
//#define roundD(a) ((a-(int)a>=0.5) ? ceil(a) : floor(a))
#define roundD(a) rint(a)
#endif

#ifndef MAXDOUBLE
#define MAXDOUBLE 1.79769e+308
#endif

#include <string.h>
#include <iostream>
using namespace std;
#include <values.h>
#include "hanning.H"

//From the MFFM_TimeCode library available at sourceforge.net
#include <mffm/timeCode.H>

//#define W_DEBUG

#ifndef ATYPE
#define ATYPE short int
#endif
#ifndef SAMPLE_FREQUENCY
#define SAMPLE_FREQUENCY 44100
#endif

//The hanning window duration in s
#define HANNING_DURATION 0.1
//The proportional hanning overlap
#define HANNING_OVERLAP 0.5
//The sample length of the hanning window
#define HANNING_LENGTH(sampFreq) (int)roundD(HANNING_DURATION*(double)sampFreq)

//The pitch (Hz) which we don't want to exceed the period of durin matching
#define DELTA_DIVISOR 6.0

/* reference :
   An Overlap Add Technique Based on Waveform Similarity (WSOLA)
   For High Quality Time-Scale Modification of Speech
   Werner Verhelst and Marc Roelands
   IEEE publication
*/

#define TIMECODETYPE_W TimeCode<MasterCounter<field, 1>, ATYPE>

//As we are using Hanning windows, extremes of the windows aren't important
#define NO_SEARCH_PERCENTAGE 10.0/100.0 //Don't search this much of a frame

class WSOLA : public Hanning {
  double locFBF; //used in frame by frame mode
  TIMECODETYPE_W sFrame, dFrame, nextSFrame;//src and dest and next src frames
  ATYPE *output; // The timescale modified audio
  int count;

  int hanningLength, deltaMax, lastNextSFrame, lastDFrame, sampleFrequency;

  void processFrame(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : enter"<<endl;
#endif
    int loc=sFrame.getCount();
    //Load the src frame window we are matching to
    ATYPE* tempSFW=sFrame.window->getDataPtr();
    for (int i=0;i<sFrame.window->getCount();i++)
      tempSFW[i]=(int)roundD((double)input[loc+i]*wnd[i]);

    //Search through for the closest match
    int cnt=hanningLength, initialDFrameLoc=dFrame.getCount(), initialNextSFrameLoc=nextSFrame.getCount();
    int delta=0;
    double difference, minDifference=MAXDOUBLE;
    int bestLocation=0;
    ATYPE *tempNSFW;
    if (initialDFrameLoc!=0) //We aren't at the beginning so process as normal
      while (delta<deltaMax){
	tempNSFW=nextSFrame.window->getDataPtr();
	loc=nextSFrame.getCount();
	for (int i=0;i<nextSFrame.window->getCount();i++)
	  tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
	if ((difference=findSimilarity(sFrame.window->getDataPtr(),nextSFrame.window->getDataPtr(), cnt))<minDifference){
	  minDifference=difference;
	  bestLocation=delta;
	  //cout<<"minDifference "<<minDifference<<" bestLocation "<<bestLocation<<endl;
	}
	nextSFrame=nextSFrame.getCount()+1;
	delta++;
      }

    //Copy the closest match over to the output audio array
    //cout<<"bestLocation: "<<bestLocation<<endl;
    nextSFrame=initialNextSFrameLoc+bestLocation;
    tempNSFW=nextSFrame.window->getDataPtr();
    loc=nextSFrame.getCount();
    int i;
    for (i=0;i<nextSFrame.window->getCount();i++)
      tempNSFW[i]=(ATYPE)roundD((double)input[loc+i]*wnd[i]);
    int maxBoundary;
    //    cout<<"difference= "<<(nextSFrame.getEnd()-nextSFrame.getCount())<<endl;
    if (cnt>(dFrame.getEnd()-dFrame.getCount()))
      maxBoundary=(dFrame.getEnd()-dFrame.getCount());
    else
      maxBoundary=cnt;
    //cout<<"i "<<i<<" maxBoundary "<<maxBoundary<<" initialDFrameLoc "<< initialDFrameLoc<<endl;
    tempNSFW=nextSFrame.window->getDataPtr();
    for (i=0;i<maxBoundary;i++)
      output[i+initialDFrameLoc]+=tempNSFW[i];
    //cout<<"i "<<i<<endl;
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : exit"<<endl;
#endif
  }

  void processLastFrame(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processLastFrame : enter"<<endl;
#endif
    //Find ammount left to use ...
    //int sLeft=sFrame.getEnd()-sFrame.getCount();
    int nSLeft=nextSFrame.getEnd()-nextSFrame.getCount();
    int dLeft=dFrame.getEnd()-dFrame.getCount();
    int lastPoint;
    if (dLeft!=0){
      //cout<<" sLeft "<<sLeft<<" nSLeft "<<nSLeft<<" dLeft "<<dLeft<<endl;
      
      //Reload the second half without hanning
      int halfCnt=(int)roundD((double)hanningLength/2.0);
      //    cout<<"hanningLength "<<hanningLength<<" halfCnt "<<halfCnt<<endl;
      for (int i=0;i<halfCnt;i++)
	output[lastDFrame+halfCnt+i]=input[lastNextSFrame+halfCnt+i];
      lastPoint=lastDFrame+halfCnt+halfCnt-1;
      //cout<<dFrame<<endl;
      //lastPoint=lastDFrame+halfCnt;
      //cout<<"lastDFrame "<<lastDFrame<<" lastPoint "<<lastPoint<<endl;

      //int newDLeft=dFrame.getEnd()-lastPoint;
      int newDLeft=dFrame.getEnd()-lastDFrame-halfCnt;
      //cout<<"newDLeft "<<newDLeft<<endl;

      if (nSLeft==dLeft){//Neither compressing nor stretching time
	for (int i=0;i<dFrame.getEnd()-lastPoint;i++)
	  output[lastPoint+i]=input[lastPoint+i];
      } else if (nSLeft>dLeft){//Compressing time
	//cout<<"here "<<(int)roundD((double)newDLeft/2.0)<<endl;
	int tempCnt=(int)roundD((double)newDLeft/2.0);
	//Hanning tempHanning(tempCnt);
	
	reset(tempCnt);
	//halfCnt=(int)roundD((double)tempHanning.getCount()/2.0);

	//Apply the hanning to the output
	int startLoc=lastPoint-halfCnt;
	nextSFrame=nextSFrame.getEnd()-newDLeft-halfCnt;
	int loc1=nextSFrame.getCount();
	for (int i=startLoc;i<=lastPoint;i++, loc1++)
	  output[i]=(ATYPE)roundD((double)output[i]*wnd[lastPoint-i]+
				  wnd[i-startLoc]*(double)input[loc1]);
	//output[i]=(ATYPE)roundD((double)output[i]*tempHanning[lastPoint-i]+
	//tempHanning[i-startLoc]*(double)input[loc1]);
	nextSFrame=nextSFrame.getEnd()-newDLeft;
	dFrame=dFrame.getEnd()-newDLeft;
	for (int i=0;i<newDLeft;i++,nextSFrame+=1,dFrame+=1)
	  output[dFrame.getCount()]=input[nextSFrame.getCount()];
	roundOff();
      }else if (nSLeft<dLeft){//Expanding time
	//We can simply copy the rest in ...
	nextSFrame=lastNextSFrame+hanningLength;
	newDLeft=nextSFrame.getEnd()-nextSFrame.getCount();
	int loc1=nextSFrame.getCount();
	dFrame=lastDFrame+hanningLength-1;
	int loc2=dFrame.getCount();
	for (int i=0;i<newDLeft;i++, loc2++, loc1++)
	  output[loc2]=input[loc1];
      }
    }
#ifdef W_DEBUG
    cout<<"WSOLA::processLastFrame : exit"<<endl;
#endif
  }

  // This function half hannings 1 ms at the end of the signal to ensure no clicking
  void roundOff(void){
    int wndSize=(int)roundD((double)sampleFrequency*0.01);
    if (wndSize%2)
      wndSize++;
    //Hanning smoothEnd(wndSize);
    reset(wndSize);
    int halfCnt=Hanning::getCount()/2;
    int loc=dFrame.getEnd()-halfCnt+1;
    int index=halfCnt;
    for (int i=loc;i<loc+halfCnt;i++)
      output[i]=(int)roundD((double)output[i]*wnd[index++]);
  }

  inline double findSimilarity(ATYPE *src, ATYPE *nextSrc, int cnt){
    register double sim=0;
    int lower=(int)roundD((float)cnt*NO_SEARCH_PERCENTAGE); //No need to look at extremes - 'cause of hanning
    int upper=cnt-lower;
    for (int i=lower;i<upper;i++)
      //for (int i=0;i<cnt;i++)
      sim+=(double)abs(src[i]-nextSrc[i]);
    //cout<<sim<<endl;
    //    return sim*hanningLength/cnt;
    return sim;
  }
public:
  ///Constructor
  WSOLA(int hl=HANNING_LENGTH(SAMPLE_FREQUENCY), int sf=SAMPLE_FREQUENCY) : Hanning(hl){
#ifdef W_DEBUG
    cout<<"WSOLA::WSOLA : enter"<<endl;
#endif
    hanningLength=hl;
    sampleFrequency=sf;
    deltaMax=(int)roundD((double)hanningLength/DELTA_DIVISOR);
    output=NULL;
    count=0;
#ifdef W_DEBUG
    cout<<"hanningLenght = "<<hanningLength<<" deltaMax = "<<deltaMax<<endl;
    cout<<"WSOLA::WSOLA : exit"<<endl;
#endif
  }

  ///Destructor
  ~WSOLA(void){
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : enter"<<endl;
#endif
    if (output) delete [] output;
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : exit"<<endl;
#endif
  }

  /**This is the actual processing loop.
    It take the pointer to the original audio (input), the sample count (cnt),
    it also takes the tau to scale by (tau)
   */
  void process(ATYPE *input, int cnt, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::process : enter"<<endl;
#endif

    //Malloc the output audio array
#ifdef W_DEBUG
    cout<<"WSOLA::process : processing : "<<cnt<<" samples"<<endl;
    cout<<"WSOLA::process : initting output array to : "<<(int)roundD(tau*(double)cnt)<<endl;
#endif
    int tempCnt=(int)roundD(tau*(double)cnt);
    if (tempCnt<=1)
      tempCnt=1;

    if (count<tempCnt){
      count=tempCnt;
      if (output) delete [] output;
      if (!(output=new ATYPE[tempCnt])){
	cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
	exit(-1);
      }
    }
    bzero(output, tempCnt*sizeof(ATYPE));

    //Set up the time codes
    sFrame.init(0,cnt);
    nextSFrame.init(0,cnt);
    dFrame.init(0,(int)roundD((double)cnt*tau));
    //Ensure the array frame and window sizes are correct
    (*sFrame.window)=hanningLength;
    (*nextSFrame.window)=hanningLength;
    (*dFrame.window)=hanningLength;
    sFrame.window->setFrameSize(sizeof(ATYPE));
    nextSFrame.window->setFrameSize(sizeof(ATYPE));
    dFrame.window->setFrameSize(sizeof(ATYPE));

    //Process input frame by input frame ....
    double loc=0;
    sFrame=0;
    nextSFrame=0;
    dFrame=0;

    //Set up the first half output frame ...
    int halfWndCnt=(int)roundD((double)sFrame.window->getCount()/2.0);
    for (int i=0;i<halfWndCnt;i++)
      output[i]+=(ATYPE)roundD((double)input[i]*wnd[i+halfWndCnt]);

    int wndCnt=hanningLength;
    while (((nextSFrame.getCount()+wndCnt)<cnt) && ((sFrame.getCount()+wndCnt)<cnt) &&
	   ((dFrame.getCount()+(int)roundD((double)hanningLength/2.0))<dFrame.getEnd())){//Loop untill we've hit the end of the frame
      //Set the source frame to its beginning location
      processFrame(input);

      cout<<"Start: "<<dFrame.getBeginning()<<"\tCurrent: "<<dFrame.getCount()<<"\t\tStop: "<<dFrame.getEnd()<<'\r';
      cout<<'\n';

      sFrame=(int)roundD(((double)nextSFrame.getCount()+(double)hanningLength/2.0));
      loc+=(double)hanningLength/2.0;
      lastNextSFrame=nextSFrame.getCount();
      nextSFrame=(int)roundD(loc/tau);
      lastDFrame=dFrame.getCount();
      dFrame=(int)roundD(loc);
    }
    cout<<'\n';

    processLastFrame(input);
#ifdef W_DEBUG
    cout<<"WSOLA::process : exit"<<endl;
#endif
  }

  /**This is the actual processing loop.
    It take the pointer to the original audio (input), the sample count (cnt),
    it also takes the tau to scale by (tau)
   */
  void processFrameByFrameInit(ATYPE *input, int cnt, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrameByFrameInit : enter"<<endl;
#endif

    //Malloc the output audio array
#ifdef W_DEBUG
    cout<<"WSOLA::process : processing : "<<cnt<<" samples"<<endl;
    cout<<"WSOLA::process : initting output array to : "<<(int)roundD(tau*(double)cnt)<<endl;
#endif
    int tempCnt=(int)ceil(tau*(double)cnt);
    if (tempCnt<=1)
      tempCnt=1;

    if (count<tempCnt){
      count=tempCnt;
      if (output) delete [] output;
      if (!(output=new ATYPE[tempCnt])){
	cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
	exit(-1);
      }
    }
    bzero(output, tempCnt*sizeof(ATYPE));

    //Set up the time codes
    sFrame.init(0,cnt);
    nextSFrame.init(0,cnt);
    dFrame.init(0,(int)roundD((double)cnt*tau));
    //Ensure the array frame and window sizes are correct
    (*sFrame.window)=hanningLength;
    (*nextSFrame.window)=hanningLength;
    (*dFrame.window)=hanningLength;
    sFrame.window->setFrameSize(sizeof(ATYPE));
    nextSFrame.window->setFrameSize(sizeof(ATYPE));
    dFrame.window->setFrameSize(sizeof(ATYPE));

    //Process input frame by input frame ....
    sFrame=0;
    nextSFrame=0;
    dFrame=0;

    //Set up the first half output frame ...
    int halfWndCnt=(int)roundD((double)sFrame.window->getCount()/2.0);
    for (int i=0;i<halfWndCnt;i++)
      output[i]+=(ATYPE)roundD((double)input[i]*wnd[i+halfWndCnt]);

    locFBF=0; //set the initial location
  }

  int processFrameByFrame(ATYPE *input, int cnt, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrameByFrame : enter"<<endl;
#endif
   cout<<"channels*cnt "<<cnt<<'\n';
    int wndCnt=hanningLength;
    //cout<<"tau "<<tau<<endl;
    //Set the source frame to its beginning location
    processFrame(input);
    
    cout<<"Start: "<<dFrame.getBeginning()<<"\tCurrent: "<<dFrame.getCount()<<"\t\tStop: "<<dFrame.getEnd()<<'\r';
    
    sFrame=(int)roundD(((double)nextSFrame.getCount()+(double)hanningLength/2.0));
    lastNextSFrame=nextSFrame.getCount();
    locFBF+=(double)hanningLength/2.0;
    nextSFrame=(int)roundD(locFBF/tau);
    //locFBF+=(double)hanningLength/2.0;
    //nextSFrame=(int)roundD(locFBF/tau);
    ////    nextSFrame=(int)roundD(lastNextSFrame/tau);

    cout<<"\ntau: count: "<<tau<<'\n';
    cout<<"\nlocFBF: count: "<<locFBF<<'\n';
    cout<<"\nsFrame: count: "<<sFrame.getCount()<<endl;
    cout<<"\nnextSFrame: count: "<<nextSFrame.getCount()<<endl;
    cout<<"\ncnt: count: "<<cnt<<endl;

    lastDFrame=dFrame.getCount();
    dFrame=(int)roundD(locFBF);

    return (((nextSFrame.getCount()+wndCnt)<cnt) && ((sFrame.getCount()+wndCnt)<cnt));
    //    return (((nextSFrame.getCount()+wndCnt)<cnt) && ((sFrame.getCount()+wndCnt)<cnt) && ((dFrame.getCount()+(int)roundD((double)hanningLength/2.0))<dFrame.getEnd()));
    }

  void processFrameByFrameComplete(ATYPE *input){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrameByFrame : enter"<<endl;
#endif
    cout<<'\n';

    //processLastFrame(input);
#ifdef W_DEBUG
    cout<<"WSOLA::process : exit"<<endl;
#endif
  }


  ///This is the size of the modified (output) data
  int getCount(void){return dFrame.getEnd();}

  ///Returns the current sample location
//  int getLoc(void){return sFrame.getCount();};
//  int getLoc(void){return nextSFrame.getCount();};
  int getLoc(void){return dFrame.getCount();};

  ///This is for indexing the modified (output) data
  ATYPE& operator[](int i){
    return output[i];
  }
};
#endif //WSOLA_H_
