/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "NewickFormat.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <gobjects/PhyTreeObject.h>
#include <util_text/TextUtils.h>

namespace GB2 {

/* TRANSLATOR GB2::IOAdapter */
/* TRANSLATOR GB2::NewickFormat */

NewickFormat::NewickFormat(QObject* p) : DocumentFormat(p, DocumentFormatFlags_W1) 
{
	fileExtensions << "nwk" << "newick";
	formatName = tr("Newick Standard");
    supportedObjectTypes += GObjectTypes::PHYLOGENETIC_TREE;
}


#define BUFF_SIZE 1024

static QList<GObject*> parseTrees(IOAdapter* io, TaskStateInfo& si);

Document* NewickFormat::loadDocument(IOAdapter* io, TaskStateInfo& ti, const QVariantMap& fs, DocumentLoadMode mode) {
    QList<GObject*> objects = parseTrees(io, ti);
    if (ti.hasErrors()) {
        qDeleteAll(objects);
        return NULL;
    }

    Document* d = new Document(this, io->getFactory(), io->getURL(), objects, fs);
    return d;
}

static void writeNode(IOAdapter* io, PhyNode* node) {
    int branches = node->branches.size();
    if (branches == 1 && (node->name=="" || node->name=="ROOT")) {
        assert(node != node->branches[0]->node2);
        writeNode(io, node->branches[0]->node2);
        return;
    }
    if (branches > 1) {
        io->writeBlock("(", 1);
        bool first = true;
        for (int i = 0; i < branches; ++i) {
            if (node->branches[i]->node2 != node) {
                if (first) {
                    first = false;
                } else {
                    io->writeBlock(",", 1);
                }
                writeNode(io, node->branches[i]->node2);
                io->writeBlock(":", 1);
                io->writeBlock(QByteArray::number(node->branches[i]->distance));
            }
        }
        io->writeBlock(")", 1);
    } else {
        io->writeBlock(node->name.replace(' ', '_').toAscii());
    }
}

void NewickFormat::storeDocument( Document* d, TaskStateInfo& ts, IOAdapter* io) {
	assert(d->getDocumentFormat() == this);
	
    foreach(GObject* obj, d->getObjects()) {
        PhyTreeObject* phyObj = qobject_cast<PhyTreeObject*>(obj);
        if (phyObj != NULL) {
            writeNode(io, phyObj->getTree()->rootNode);
            io->writeBlock(";\n", 2);
        }
    }
}

bool NewickFormat::checkRawData(const QByteArray& rawData) const {
    const char* data = rawData.constData();
    int size = rawData.size();
    bool containsBinary = TextUtils::contains(TextUtils::BINARY, data, size);
    if (containsBinary) {
        return false;
    }
    int brackets = 0;
    typedef enum { letter, letter_than_whites, any } Cases;
    Cases last = any;
    for (int i = 0; i < size; ++i) {
        switch (data[i]) {
            case '(':
                ++brackets;
                break;
            case ')':
                if (brackets == 0) {
                    return false;
                }
                --brackets;
                break;
            case ';':
                if (brackets != 0) {
                    return false;
                }
                break;
            default:
                if (TextUtils::ALPHA_NUMS[data[i]] || data[i] == '-' || data[i] == '_') {
                    if (last == letter_than_whites) {
                        return false;
                    }
                    last = letter;
                    continue;
                }
                if (TextUtils::WHITES[data[i]]) {
                    if (last == letter || last == letter_than_whites) {
                        last = letter_than_whites;
                        continue;
                    }
                }
        }
        last = any;
    }
    return true;
}


/* TODO:
 Unquoted labels may not contain blanks, parentheses, square brackets, single_quotes, colons, semicolons, or commas.
 Single quote characters in a quoted label are represented by two single quotes.
 Blanks or tabs may appear anywhere except within unquoted labels or branch_lengths.
 Newlines may appear anywhere except within labels or branch_lengths.
 Comments are enclosed in square brackets and may appear anywhere newlines are permitted. 
*/
static QList<GObject*> parseTrees(IOAdapter *io, TaskStateInfo& si) {
    QList<GObject*> objects;
    QByteArray block(BUFF_SIZE, '\0');
    int blockLen;
    bool done = true;
    int j = 0;

    QBitArray ops(256);
    ops['('] = ops[')'] = ops[':']  = ops[','] = ops[';'] = true;
    enum ReadState {RS_NAME, RS_WEIGHT};
    ReadState state = RS_NAME;
    QString lastStr;
    PhyNode *rd = new PhyNode();

    QStack<PhyNode*> nodeStack;
    QStack<PhyBranch*>  branchStack;
    nodeStack.push(rd);
    while ((blockLen = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
        for (int i = 0; i < blockLen; ++i) {
            unsigned char c = block[i];
            if (TextUtils::WHITES[(uchar)c]) {
                continue;
            }
            done = false;
            if (!ops[(uchar)c]) { //not ops -> cache
                lastStr.append(c);
                continue;
            }
            // use cached value
            if (state == RS_NAME) {
                nodeStack.top()->name = lastStr.replace('_', ' ');
            } else {
                assert(state == RS_WEIGHT);
                if (!branchStack.isEmpty()) { //ignore root node weight if present
                    if (nodeStack.size() < 2) {
                        si.setError(NewickFormat::tr("weight_unexpected_%1").arg(lastStr));
                    }
                    bool ok = false;
                    branchStack.top()->distance = lastStr.toDouble(&ok);
                    if (!ok) {
                        si.setError(NewickFormat::tr("weight_parse_error_%1").arg(lastStr));
                        break;
                    }           
                }
            }
            
            // advance in state
            if (c == '(') { //new child
                assert(!nodeStack.isEmpty());
                PhyNode* pn = new PhyNode();
                PhyBranch* bd = PhyNode::addBranch(nodeStack.top(),pn, 0);
			    nodeStack.push(pn);
                branchStack.push(bd);
                state = RS_NAME;
            } else if (c == ':') { //weight start
                if (state == RS_WEIGHT) {
                    si.setError(NewickFormat::tr("unexpected_weight_start_%1").arg(lastStr));
                    break;
                }
                state = RS_WEIGHT;
            } else if ( c == ',') { //new sibling
                nodeStack.pop();
                assert(!nodeStack.isEmpty());
                assert(!branchStack.isEmpty());
                branchStack.pop();
                PhyNode* pn = new PhyNode();
                PhyBranch* bd = PhyNode::addBranch(nodeStack.top(), pn, 0);
                nodeStack.push(pn);
			    branchStack.push(bd);
                state = RS_NAME;
            } else if ( c == ')' ) { //end of the branch, go up
                nodeStack.pop();
                if (nodeStack.isEmpty()) {
                    si.setError(NewickFormat::tr("unexpected_closing_bracket_%1").arg(lastStr));
                    break;
                }
                assert(!branchStack.isEmpty());
                branchStack.pop();
                state = RS_NAME;
            } else if (c == ';') {
                if (!branchStack.isEmpty() || nodeStack.size()!=1) {
                    si.setError(NewickFormat::tr("unexpected_eof"));
                    break;
                }
                PhyTree tree(new PhyTreeData());
                tree->rootNode = nodeStack.pop();
                objects.append(new PhyTreeObject(tree, QString("Tree%1").arg(j++)));
                nodeStack.push(rd = new PhyNode());
                done = true;
            } 
            lastStr.clear();
        }
        if (si.hasErrors() || si.cancelFlag) {
            delete rd;
            rd = NULL;
            break;
        }
        si.progress = io->getProgress();
    }
    if (!si.hasErrors() && !si.cancelFlag) {
        if (!branchStack.isEmpty() || nodeStack.size()!=1) {
            delete rd;
            si.setError(NewickFormat::tr("unexpected_eof"));
            return objects;
        }
        if (!done) {
            PhyNode *node = nodeStack.pop();
            PhyTree tree(new PhyTreeData());
            tree->rootNode = node;
            objects.append(new PhyTreeObject(tree, QString("Tree%1").arg(j)));
        } else {
            delete rd;
            if (objects.empty()) {
                si.setError(NewickFormat::tr("empty_file"));
            }
        }
    }
    return objects;
}

}//namespace
