///###////////////////////////////////////////////////////////////////////////
//
// Burton Computer Corporation
// http://www.burton-computer.com
// http://www.cooldevtools.com
// $Id: PhrasingTokenizer.cc 65 2004-11-04 06:13:12Z brian $
//
// Copyright (C) 2000 Burton Computer Corporation
// ALL RIGHTS RESERVED
//
// This program is open source software; you can redistribute it
// and/or modify it under the terms of the Q Public License (QPL)
// version 1.0. Use of this software in whole or in part, including
// linking it (modified or unmodified) into other programs is
// subject to the terms of the QPL.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// Q Public License for more details.
//
// You should have received a copy of the Q Public License
// along with this program; see the file LICENSE.txt.  If not, visit
// the Burton Computer Corporation or CoolDevTools web site
// QPL pages at:
//
//    http://www.burton-computer.com/qpl.html
//    http://www.cooldevtools.com/qpl.html
//

#include "AbstractTokenReceiver.h"
#include "PhrasingTokenizer.h"

static const string SEPARATOR(" ");

PhrasingTokenizer::PhrasingTokenizer(AbstractTokenizer *tokenizer,
                                     int min_length,
                                     int max_length,
                                     int max_char_length)
: m_tokenizer(tokenizer),
  m_minLength(min_length),
  m_maxLength(max_length),
  m_maxCharLength(max_char_length)
{
}

PhrasingTokenizer::~PhrasingTokenizer()
{
}

void PhrasingTokenizer::receiveToken(const string &token)
{
    while (m_words.size() >= m_maxLength) {
        m_words.pop_back();
    }
    m_words.insert(m_words.begin(), token);
    if (m_minLength <= 1) {
        m_receiver->receiveToken(token);
    }

    string phrase;
    phrase += token;
    int phrase_length = 2;
    for (int i = 1; i < m_words.size(); ++i) {
        phrase.insert(0, SEPARATOR);
        phrase.insert(0, m_words[i]);
        if (m_maxCharLength > 0 && phrase.length() > m_maxCharLength) {
          break;
        }
        if (phrase_length >= m_minLength) {
            m_receiver->receiveToken(phrase);
        }
        ++phrase_length;
    }
}

void PhrasingTokenizer::tokenize(AbstractTokenReceiver *receiver,
                                 AbstractCharReader *reader)
{
    m_receiver = receiver;
    m_words.clear();
    m_tokenizer->tokenize(this, reader);
}
