/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "uhmmcalibrate.h"

#include <hmmer2/funcs.h>

#include <time.h>
#include <assert.h>
#include <float.h>
#include <stdlib.h>

#include <QMutexLocker>
#include <core_api/Task.h>

namespace GB2 {
static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample,
                             float lenmean, float lensd, int fixedlen, 
                             struct histogram_s **ret_hist, float *ret_max, bool& cancelFlag, int& progress);

UHMMCalibrateSettings::UHMMCalibrateSettings() {
    nsample      = 5000;
    fixedlen     = 0;
    lenmean      = 325.;
    lensd        = 200.;
    seed         = (int) time ((time_t *) NULL);
	nThreads     = 1;
}

void UHMMCalibrate::calibrate(plan7_s* hmm, const UHMMCalibrateSettings& s, TaskStateInfo& si) {
    
    struct histogram_s *hist;     // a resulting histogram 

    float max = 0;
    main_loop_serial(hmm, s.seed, s.nsample, s.lenmean, s.lensd, s.fixedlen,  &hist, &max, si.cancelFlag, si.progress);
    if (!si.cancelFlag) {
        // Fit an EVD to the observed histogram.
        // The TRUE left-censors and fits only the right slope of the histogram.
        // The 9999. is an arbitrary high number that means we won't trim
        // outliers on the right.

        if (! ExtremeValueFitHistogram(hist, TRUE, 9999.)) {
            si.error = "fit failed; num sequences may be set too small?\n";
        } else {
            hmm->flags |= PLAN7_STATS;
            hmm->mu     = hist->param[EVD_MU];
            hmm->lambda = hist->param[EVD_LAMBDA];
        }
    }
    FreeHistogram(hist);
}


// Function: main_loop_serial()
// Date:     SRE, Tue Aug 18 16:18:28 1998 [St. Louis]
//
// Purpose:  Given an HMM and parameters for synthesizing random
//           sequences; return a histogram of scores.
//           (Serial version)  
//
// Args:     hmm      - an HMM to calibrate.
//           seed     - random number seed
//           nsample  - number of seqs to synthesize
//           lenmean  - mean length of random sequence
//           lensd    - std dev of random seq length
//           fixedlen - if nonzero, override lenmean, always this len
//           ret_hist - RETURN: the score histogram 
//           ret_max  - RETURN: highest score seen in simulation
//
// Returns:  (void)
//           hist is alloc'ed here, and must be free'd by caller.

static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, 
                            float lenmean, float lensd, int fixedlen,
                            struct histogram_s **ret_hist, float *ret_max, bool& cancelFlag, int& progress)
{
    struct histogram_s *hist;
    struct dpmatrix_s  *mx;
    float  randomseq[MAXABET];
    float  p1;
    float  max;
    char  *seq;
    unsigned char  *dsq;
    float  score;
    int    sqlen;
    int    idx;

    // Initialize.
    // We assume we've already set the alphabet (safe, because
    // HMM input sets the alphabet).
    
    sre_srandom(seed);

	//get HMMERTaskLocalData
	HMMERTaskLocalData *tls = getHMMERTaskLocalData();
    alphabet_s &al = tls->al;
	
    SetAlphabet(hmm->atype);

    P7Logoddsify(hmm, TRUE);
    P7DefaultNullModel(randomseq, &p1);
    hist = AllocHistogram(-200, 200, 100);
    mx = CreatePlan7Matrix(1, hmm->M, 25, 0);
    max = -FLT_MAX;

    progress = 0;
    int pStub;
    
    for (idx = 0; idx < nsample && !cancelFlag; idx++) {
        // choose length of random sequence
        if (fixedlen) {
            sqlen = fixedlen;
        } else {
            do sqlen = (int) Gaussrandom(lenmean, lensd); while (sqlen < 1);
        }
        // generate it
        seq = RandomSequence(al.Alphabet, randomseq, al.Alphabet_size, sqlen);
        dsq = DigitizeSequence(seq, sqlen);

        if (P7ViterbiSpaceOK(sqlen, hmm->M, mx)) {
            score = P7Viterbi(dsq, sqlen, hmm, mx, NULL);
        } else {
            score = P7SmallViterbi(dsq, sqlen, hmm, mx, NULL, pStub);
        }
    
        AddToHistogram(hist, score);
        max = qMax(score, max);

        progress = int(100*idx/float(nsample));

        free(dsq); 
        free(seq);
    }

    FreePlan7Matrix(mx);
    *ret_hist   = hist;
    *ret_max    = max;
}


//parallel calibrate

void UHMMCalibrate::calibrateParallelWorker(WorkPool_s *wpool, TaskStateInfo& si) {
    
	struct plan7_s    *hmm;
	struct dpmatrix_s *mx;

	char              *seq;
	unsigned char     *dsq;
	int         len;
	float       sc;

	HMMERTaskLocalData *tls = getHMMERTaskLocalData();
	struct alphabet_s *al = &tls->al;
	hmm   = wpool->hmm;
	//struct alphabet_s *al = wpool->al;
	mx    = CreatePlan7Matrix(1, hmm->M, 25, 0);
	int pStub;
	for (;;)
	{
		{
			/* acquire a lock */
			QMutexLocker locker(&wpool->lockInput);
			/* generate a sequence */
			wpool->nseq++;
			if (wpool->nseq > wpool->nsample) 
			{ 
				/* we're done; release input lock, break loop */
				//wpool->lockInput.unlock();
				locker.unlock();
				break;
			}
			if (wpool->fixedlen) len = wpool->fixedlen;
			else do len = (int) Gaussrandom(wpool->lenmean, wpool->lensd); while (len < 1);
			seq = RandomSequence(al->Alphabet, wpool->randomseq, al->Alphabet_size, len);
		}
		dsq = DigitizeSequence(seq, len);

		if (P7ViterbiSpaceOK(len, hmm->M, mx))
			sc = P7Viterbi(dsq, len, hmm, mx, NULL);
		else
			sc = P7SmallViterbi(dsq, len, hmm, mx, NULL, pStub);
		free(dsq); 
		free(seq);
		  
		{
			/* acquire lock on the output queue */
			QMutexLocker locker(&wpool->lockOutput);
			/* save output */
			AddToHistogram(wpool->hist, sc);
			if (sc > wpool->max_score) wpool->max_score = sc;
			*(wpool->progress) = si.progress = int(100*wpool->nseq/float(wpool->nsample));
		}
	}
	FreePlan7Matrix(mx);
}


}//namespace
