/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "RepeatWorker.h"
#include "FindRepeatsDialog.h"

#include <workflow/IntegralBusModel.h>
#include <workflow/WorkflowEnv.h>
#include <workflow/WorkflowRegistry.h>
#include <workflow_support/CoreDataTypes.h>
#include <workflow_library/BioDatatypes.h>
#include <workflow_library/BioActorLibrary.h>
#include <workflow_support/DelegateEditors.h>
#include <workflow_support/CoreLibConstants.h>

#include <datatype/DNASequence.h>
#include <core_api/DNATranslation.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/AppContext.h>
#include <core_api/Log.h>
#include <core_api/GObjectReference.h>
#include <util_tasks/FailTask.h>

#include <QtGui/QApplication>

/* TRANSLATOR GB2::LocalWorkflow::RepeatWorker */

namespace GB2 {
namespace LocalWorkflow {

static LogCategory log(ULOG_CAT_WD);

/******************************
 * RepeatWorkerFactory
 ******************************/
static const QString NAME_ATTR("a_name");
static const QString LEN_ATTR("b_minlen");
static const QString IDENTITY_ATTR("c_identity");
static const QString MIN_DIST_ATTR("d_mindist");
static const QString MAX_DIST_ATTR("e_maxdist");
static const QString INVERT_ATTR("f_invert");
static const QString NESTED_ATTR("g_nested");
static const QString ALGO_ATTR("h_algo");
static const QString THREADS_ATTR("i_threads");

const QString RepeatWorkerFactory::ACTOR_ID("repeat.finder");

void RepeatWorkerFactory::init() {
    QList<PortDescriptor*> p; QList<Attribute*> a;

    {
        Descriptor id(CoreLibConstants::IN_PORT_ID, RepeatWorker::tr("Input sequences"), RepeatWorker::tr("A nucleotide sequence to search repeats in."));
        Descriptor od(CoreLibConstants::OUT_PORT_ID, RepeatWorker::tr("Repeat annotations"), RepeatWorker::tr("A set of annotations marking repeats found in the sequence."));
        p << new PortDescriptor(id, BioDataTypes::DNA_SEQUENCE_TYPE(), true /*input*/);
        p << new PortDescriptor(od, BioDataTypes::ANNOTATION_TABLE_TYPE(), false /*input*/, true /*multi*/);
    }
    {
        Descriptor nd(NAME_ATTR, RepeatWorker::tr("Annotate as"), RepeatWorker::tr("Name of the result annotations marking found repeats."));
        Descriptor idd(IDENTITY_ATTR, RepeatWorker::tr("Identity"), RepeatWorker::tr("Repeats identity."));
        Descriptor ld(LEN_ATTR, RepeatWorker::tr("Min length"), RepeatWorker::tr("Minimum length of repeats."));
        Descriptor mid(MIN_DIST_ATTR, RepeatWorker::tr("Min distance"), RepeatWorker::tr("Minimum distance between repeats."));
        Descriptor mad(MAX_DIST_ATTR, RepeatWorker::tr("Max distance"), RepeatWorker::tr("Maximum distance between repeats."));
        Descriptor ind(INVERT_ATTR, RepeatWorker::tr("Inverted"), RepeatWorker::tr("Search for inverted repeats."));
        Descriptor nsd(NESTED_ATTR, RepeatWorker::tr("Filter nested"), RepeatWorker::tr("Filter nested repeats."));
        Descriptor ald(ALGO_ATTR, RepeatWorker::tr("Algorithm"), RepeatWorker::tr("Control over variations of algorithm."));
        Descriptor thd(THREADS_ATTR, RepeatWorker::tr("Parallel threads"), RepeatWorker::tr("Number of parallel threads used for the task."));

        FindRepeatsTaskSettings cfg = FindRepeatsDialog::defaultSettings();
        a << new Attribute(nd, CoreDataTypes::STRING_TYPE(), true, "repeat_unit");
        a << new Attribute(ld, CoreDataTypes::NUM_TYPE(), false, cfg.minLen);
        a << new Attribute(idd, CoreDataTypes::NUM_TYPE(), false, cfg.getIdentity());
        a << new Attribute(mid, CoreDataTypes::NUM_TYPE(), false, cfg.minDist);
        a << new Attribute(mad, CoreDataTypes::NUM_TYPE(), false, cfg.maxDist);
        a << new Attribute(ind, CoreDataTypes::BOOL_TYPE(), false, cfg.inverted);
        a << new Attribute(nsd, CoreDataTypes::BOOL_TYPE(), false, cfg.filterNested);
        a << new Attribute(ald, CoreDataTypes::NUM_TYPE(), false, cfg.algo);
        a << new Attribute(thd, CoreDataTypes::NUM_TYPE(), false, cfg.nThreads);
    }

    Descriptor desc(ACTOR_ID, RepeatWorker::tr("Find repeats"), 
        RepeatWorker::tr("Finds repeats in each supplied sequence, stores found regions as annotations.")
        );
    ActorPrototype* proto = new BusActorPrototype(desc, p, a);
    QMap<QString, PropertyDelegate*> delegates;    
    {
        QVariantMap m; m["minimum"] = 0; m["maximum"] = INT_MAX; m["suffix"] = RepeatWorker::tr(" bp");
        delegates[MIN_DIST_ATTR] = new SpinBoxDelegate(m);
        m["specialValueText"] = RepeatWorker::tr("Any");
        delegates[MAX_DIST_ATTR] = new SpinBoxDelegate(m);
        m["minimum"] = 2;
        delegates[LEN_ATTR] = new SpinBoxDelegate(m);
    }
    {
        QVariantMap m; m["minimum"] = 50; m["maximum"] = 100; m["suffix"] = "%";
        delegates[IDENTITY_ATTR] = new SpinBoxDelegate(m);
    }
    {
        QVariantMap m; m["specialValueText"] = "Auto";
        delegates[THREADS_ATTR] = new SpinBoxDelegate(m);
    }
    {
        QVariantMap m; 
        m["Auto"] = RFAlgorithm_Auto;
        m["Diagonals"] = RFAlgorithm_Diagonal;
        m["Suffix index"] = RFAlgorithm_Suffix;
        delegates[ALGO_ATTR] = new ComboBoxDelegate(m);
    }

    proto->setPrompter(new RepeatPrompter());
    proto->setEditor(new DelegateEditor(delegates));
    proto->setIconPath(":repeat_finder/images/repeats.png");
    WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_BASIC(), proto);

    DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
    localDomain->registerEntry(new RepeatWorkerFactory());
}

/******************************
 * RepeatPrompter
 ******************************/
QString RepeatPrompter::composeRichDoc() {
    BusPort* input = qobject_cast<BusPort*>(target->getPort(CoreLibConstants::IN_PORT_ID));
    Actor* producer = input->getProducer(CoreLibConstants::IN_PORT_ID);
    QString producerName = producer ? tr(" from %1").arg(producer->getLabel()) : "";

    // TODO extend ?
    QString resultName = getRequiredParam(NAME_ATTR);
    QString inverted = getParameter(INVERT_ATTR).toBool() ? tr("inverted") : tr("direct");

    QString doc = tr("For each sequence<u>%1</u>, find <u>%2</u> repeats."
        "<br>Detect <u>%3% identical</u> repeats <u>not shorter than %4 bps</u>."
        "<br>Output the list of found regions annotated as <u>%5</u>.")
        .arg(producerName) //sequence from Read Fasta 1
        .arg(inverted) 
        .arg(getParameter(IDENTITY_ATTR).toInt())
        .arg(getParameter(LEN_ATTR).toInt())
        .arg(resultName);
    
    return doc;
}

/******************************
 * RepeatWorker
 ******************************/
RepeatWorker::RepeatWorker(Actor* a) : BaseWorker(a), input(NULL), output(NULL) {
}

void RepeatWorker::init() {
    input = ports.value(CoreLibConstants::IN_PORT_ID);
    output = ports.value(CoreLibConstants::OUT_PORT_ID);
}

bool RepeatWorker::isReady() {
    return (input && input->hasMessage());
}

Task* RepeatWorker::tick() {
    Message inputMessage = getMessageAndSetupScriptValues(input);
    cfg.algo = RFAlgorithm(actor->getParameter(ALGO_ATTR)->getAttributeValue<int>());
    cfg.minLen = actor->getParameter(LEN_ATTR)->getAttributeValue<int>();
    cfg.minDist = actor->getParameter(MIN_DIST_ATTR)->getAttributeValue<int>();
    cfg.maxDist = actor->getParameter(MAX_DIST_ATTR)->getAttributeValue<int>();
    int identity = actor->getParameter(IDENTITY_ATTR)->getAttributeValue<int>();
    cfg.setIdentity(identity);
    cfg.nThreads = actor->getParameter(THREADS_ATTR)->getAttributeValue<int>();
    cfg.inverted = actor->getParameter(INVERT_ATTR)->getAttributeValue<bool>();
    cfg.filterNested = actor->getParameter(NESTED_ATTR)->getAttributeValue<bool>();
    resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>();
    if(resultName.isEmpty()){
        resultName = "repeat_unit";
        log.error(tr("result name is empty, default name used"));
    }
    if(identity > 100 || identity < 0){
        log.error(tr("Incorrect value: identity value must be between 0 and 100"));
        return new FailTask(tr("Incorrect value: identity value must be between 0 and 100"));
    }
    DNASequence seq = inputMessage.getData().value<DNASequence>();
    
    if(cfg.minDist < 0){
        log.error(tr("Incorrect value: minimal distance must be greater then zero"));
        return new FailTask(tr("Incorrect value: minimal distance must be greater then zero"));
    }
    
	if (!seq.alphabet->isNucleic()) {
		QString err = tr("Sequence alphabet is not nucleic!");
		if (failFast) {
			return new FailTask(err);
		} else {
			log.error(err);
			return NULL;
		}
	}
	Task* t = new FindRepeatsToAnnotationsTask(cfg, seq, resultName, QString(), GObjectReference());
    connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
    return t;
}

void RepeatWorker::sl_taskFinished() {
    FindRepeatsToAnnotationsTask* t = qobject_cast<FindRepeatsToAnnotationsTask*>(sender());
    if (t->getState() != Task::State_Finished || t->hasErrors()) return;
    if (output) {
        const QList<SharedAnnotationData>& res = t->importAnnotations();
        QVariant v = qVariantFromValue<QList<SharedAnnotationData> >(res);
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), v));
        if (input->isEnded()) {
            output->setEnded();
        }
        log.info(tr("Found %1 repeats").arg(res.size()));
    }
}

bool RepeatWorker::isDone() {
    return !input || input->isEnded();
}

void RepeatWorker::cleanup() {
}

} //namespace LocalWorkflow
} //namespace GB2
