/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
/** PoS tagger main program.
 *
 *  @author Felipe Snchez-Martnez - fsanchez@dlsi.ua.es
 */

#include <apertium/Tagger.H>

#include <apertium/HMM.H>
#include <apertium/HMM2.H>
#include <apertium/TaggerUtils.H>
#include <apertium/TSXReader.H>

#include <cstdio>
#include <fstream>
#include <string>
#include <libgen.h>
#include <locale.h>

#include <sys/types.h>
#include <unistd.h>
#include <getopt.h>

using namespace std;

bool is_number (char *cad) {
  for(unsigned int i=0; i<strlen(cad); i++) {
    if ((cad[i]!='0') && (cad[i]!='1') && (cad[i]!='2') && (cad[i]!='3') && (cad[i]!='4') && 
        (cad[i]!='5') && (cad[i]!='6') && (cad[i]!='7') && (cad[i]!='8') && (cad[i]!='9'))
       return false;
  }
  return true; 
}


void
help() {
   cerr<<"Usage:\n"
       //<<"tagger { --train <n> [--supervised] | --retrain <n> | --tagger [--eval|--forhandtag] [--debug] } file < text\n\n"
       <<"tagger { --train <n> [--supervised] | --retrain <n> | --tagger [--eval] [--debug] } file < text\n\n"
       <<" --train|-t:      Performs <n> iterations of the Baum-Welch training algorithm (unsupervised)\n"
       <<" --supervised|-s: Initializes parameters against a hand-tagged text (supervised)\n"
       <<" --retrain|-r:    Retrains the model with <n> additional Baum-Welch iterations (unsupervised)\n"
       <<" --tagger|-g:     Tags input text by means of Viterbi algorithm\n"
       <<" --debug|-d:      Print error messages when tagging input text if any\n"
       <<" --eval|-e:       Evaluates tagging output\n"
       //<<" --forhandtag|-f: Produces a tagged text to be used as input for a hand-tagging program\n\n"
       << "Description of the files needed by each option:\n"
       <<"  file.dic\n"
       <<"    => Full expanded dictionary (It must exist if --train)\n"
       <<"  file.crp\n"
       <<"    => Training text corpus (It must exist if -train or --retrain)\n"      
       <<"  file.prob\n"
       <<"    => Transition and emission probabilities (It must exist if --tagger or --retrain)\n"      
       <<"  file.amb\n"
       <<"    => Ambiguity classes (It must exist if --tagger or --retrain)\n"      
       <<"  file.tagged\n"
       <<"    => Hand-tagged text corpus (It must exist if --supervised)\n"
       <<"  file.untagged\n"
       <<"    => Morphological analysis of the hand-tagged text corpus (It must exist if --supervised)\n"
       <<"  file.eval\n"
       <<"    => Hand-tagged text used for evaluation (It must exist if --eval)\n"
       <<"\n"
       <<"NOTE: It only reads from the standard input if --tagger\n";

   exit(1);
}

/**  Parameters filtering, files opening and errors cheking.
  *  @return the tagger working mode
  */
int 
input (int argc, char** argv, FILE **fdic, fstream& fprob, fstream& famb, FILE **fcrp, 
                              FILE **ftagged, FILE **funtagged, FILE **feval, int& nit, 
                              bool &debug) { 
   string filename, dicfile, probfile, ambfile, crpfile, taggedfile, untaggedfile, evalfile;
   int mode=UNKNOWN_MODE;
   
   int c;
   int option_index=0;
   debug=false;

   while (true) {
      static struct option long_options[] =
	{
	  {"train",      required_argument, 0, 't'},
          {"supervised", no_argument,       0, 's'},
          {"retrain",    required_argument, 0, 'r'},
          {"tagger",     no_argument,       0, 'g'},
          {"eval",       no_argument,       0, 'e'},
          //{"forhandtag", no_argument,       0, 'f'},
          {"help",       no_argument,       0, 'h'}, 
          {"debug",      no_argument,       0, 'd'}, 
          {0, 0, 0, 0}
	};

      //c=getopt_long(argc, argv, "t:sr:gefh",long_options, &option_index);
      c=getopt_long(argc, argv, "t:sr:geh",long_options, &option_index);
      if (c==-1)
	break;
      
      switch (c) {
         case 'd': {
	    debug=true;
            break;
         }
         case 't':  //Training
	    if (!is_number(optarg)) {
	       cerr<<"Error: mandatory --train argument <n> must be zero or a positive integer\n";
	       help();
	    } else 
	       nit = atoi(optarg); //Number of iterations
            if (mode==UNKNOWN_MODE) 
               mode=TRAIN_MODE;
            else {
	       cerr<<"Error: --train <n> argument cannot be mixed with --retrain or --tagger arguments\n";
	       help();
	    }
	    break;
         case 's':
	    if (mode==TRAIN_MODE) 
               mode=TRAIN_SUPERVISED_MODE;
            else{
	       cerr<<"Error: --supervised optional argument should only appear after --train <n> argument\n";
	       help();
	    }
	    break;
         case 'r':
	    if (!is_number(optarg)) {
	       cerr<<"Error: mandatory --train argument <n> must be zero or a positive integer\n";
               help();
            } else 
	       nit = atoi(optarg); //Number of iterations
            if (mode==UNKNOWN_MODE) 
               mode=RETRAIN_MODE; 
            else {
	       cerr<<"Error: --retrain argument cannot be mixed with --train or --tagger arguments\n";
	       help();
	    }
	    break;
         case 'g': 
	    if(mode==UNKNOWN_MODE)
	       mode=TAGGER_MODE;
            else {
               cerr<<"Error: --tagger argument cannot be mixed with --train or --retrain arguments\n";
	       help();
	    }
	    break;
         case 'e':
	    if (mode==TAGGER_MODE)
	       mode=TAGGER_EVAL_MODE;
            else {
               cerr<<"Error: --eval optional argument should only appear after --tagger argument\n";
	       help();
	    }
	    break;
         case 'f': 
	    if (mode==TAGGER_MODE)
	       mode=TAGGER_FORHANDTAG_MODE;
            else {
               cerr<<"Error: --forhandtag optional argument should only appear after --tagger argument\n";
	       help();
	    } 
	    break;
         case 'h':
	    help(); 
	    break;
         default:
	   //cerr<<"Error: getopt() returned the char code '"<<c<<"'\n";
           help();
      }
   }

   if(mode==UNKNOWN_MODE) {
      cerr<<"Error: Arguments missing\n";
      help();
   }   

   if (optind>=argc) {
      cerr<<"Error: file argument is missing\n";
      help();
   } else {
     filename=argv[optind];
   }

   //cerr<<"mode="<<mode<<"; nit="<<nit<<"; filename="<<filename<<"\n";
   
   dicfile = filename+".dic";  // Full expanded dictionary
   crpfile = filename+".crp";  // Training corpus text (untagged)
   probfile = filename+".prob";  // transition and emission probabilities
   ambfile = filename+".amb";  // ambiguity classes
   taggedfile = filename+".tagged"; // hand-tagged text corpus
   untaggedfile = filename+".untagged"; // Morphological analysis of the hand-tagged corpus text
   evalfile = filename+".eval"; //Tagged text for evaluation

   switch(mode) {
      case TRAIN_MODE:
         *fdic = fopen(dicfile.c_str(), "r");  
         *fcrp = fopen(crpfile.c_str(), "r");    
         fprob.open(probfile.c_str(), ios::out | ios::trunc);            
         famb.open(ambfile.c_str(), ios::out | ios::trunc);  
         break;
 
      case TRAIN_SUPERVISED_MODE:
         *fdic = fopen(dicfile.c_str(), "r");  
         *fcrp = fopen(crpfile.c_str(), "r");    
         fprob.open(probfile.c_str(), ios::out | ios::trunc);            
         famb.open(ambfile.c_str(), ios::out | ios::trunc);  
         *ftagged = fopen(taggedfile.c_str(), "r");
         *funtagged = fopen(untaggedfile.c_str(), "r");
         break;

      case RETRAIN_MODE:
         *fcrp = fopen(crpfile.c_str(), "r");    
         fprob.open(probfile.c_str(), ios::in | ios::out);
         famb.open(ambfile.c_str(), ios::in);
         break;

      case TAGGER_MODE:
         fprob.open(probfile.c_str(), ios::in);
         famb.open(ambfile.c_str(), ios::in);
         break;

      case TAGGER_EVAL_MODE:
         fprob.open(probfile.c_str(), ios::in);
         famb.open(ambfile.c_str(), ios::in);
         *feval= fopen(evalfile.c_str(), "r");
         break;

      case TAGGER_FORHANDTAG_MODE:
         fprob.open(probfile.c_str(), ios::in);
         famb.open(ambfile.c_str(), ios::in);
         break;

      default:
 	 cerr<<"Error: Unknown working mode mode\n";
         help();
   }  

   if ((mode==TRAIN_MODE) || (mode==TRAIN_SUPERVISED_MODE))
      if (!*fdic) file_name_error(dicfile);
  
   if ((mode==TRAIN_MODE)||(mode==RETRAIN_MODE)||(mode==TRAIN_SUPERVISED_MODE)) 
      if (!*fcrp) file_name_error(crpfile);
    
   if (mode==TRAIN_SUPERVISED_MODE) {
      if(!*ftagged) file_name_error(taggedfile);
      if(!*funtagged) file_name_error(untaggedfile);
   }
  
   if(mode==TAGGER_EVAL_MODE) 
      if(!*feval) file_name_error(evalfile);
    
   if(!fprob) file_name_error(probfile);
   if(!famb) file_name_error(ambfile);

  return mode;
}
  
void do_main (int argc, char *argv[], set<TTag> const &open_class,
             map<string, int> &tag_index, ConstantManager const &constants,
             vector<string> const &prefer_rules,
             vector<TForbidRule> const &forbid_rules,
             vector<TEnforceAfterRule> const &enforce_rules) {
   fstream fprob, famb;
   FILE *feval = NULL, *fcrp = NULL, *fdic = NULL, *ftagged = NULL, 
                 *funtagged = NULL;
   HMM hmm(open_class, tag_index, constants, prefer_rules);
   int n, nit = 0, mode;
   bool debug;
   
   //setlocale(LC_ALL,"es_ES");

   mode = input(argc, argv, &fdic, fprob, famb, &fcrp, &ftagged, &funtagged, 
                &feval, nit, debug);

   hmm.set_debug(debug);

   hmm.set_eos(tag_index["TAG_SENT"]);
  
   if ((mode == TRAIN_MODE)||(mode==RETRAIN_MODE)||(mode==TRAIN_SUPERVISED_MODE)) {
      if ((mode==TRAIN_MODE)||(mode==TRAIN_SUPERVISED_MODE)) {
         cerr << "Calculating  ambiguity classes...\n";
         hmm.read_dictionary(fdic);

         cerr << "Writing ambiguity classes...\n";
         hmm.write_ambiguity_classes(famb);
      
         if (mode==TRAIN_MODE) {
            cerr << "Kupiec's initialization of transition and emission probabilities...\n";
            hmm.init_probabilities_kupiec(fcrp);               
         } else { //TRAIN_SUPERVISED_MODE
            cerr << "Initializing transition and emission probabilities from a hand-tagged corpus...\n";
   	    hmm.init_probabilities_from_tagged_text(ftagged, funtagged);
         } 
         cerr << "Applying forbid and enforce rules...\n";
         hmm.apply_rules(forbid_rules, enforce_rules);

      } else { //RETRAIN_MODE
         cerr << "Reading ambiguity classes...\n";
	 hmm.read_ambiguity_classes(famb);

         cerr<< "Reading transition and emission probabilities...\n";
         hmm.read_probabilities(fprob); 
         fprob.clear();
         fprob.seekg(ios::beg);
      }

      cerr << "Training (Baum-Welch)...\n";
      for(n=0; n<nit; n++) {
         fseek(fcrp, 0, SEEK_SET);
         hmm.train(fcrp);
      }
    
      cerr << "Writing transition and emission probabilities...\n";
      hmm.write_probabilities(fprob);
    
   }  else { // TAGGER_MODE * TAGGER_EVAL_MODE * TAGGER_FORHANDTAG_MODE
      hmm.read_ambiguity_classes(famb);
      hmm.read_probabilities(fprob);   
      if(mode==TAGGER_EVAL_MODE)
      {
         hmm.tagger(stdin, feval);
         hmm.print_evaluation();
      } 
      else if (mode==TAGGER_FORHANDTAG_MODE){
         hmm.tagger(stdin, NULL, true);
      } else //TAGGER_MODE
         hmm.tagger(stdin); 
   }
}

int
Tagger::getMode(int argc, char *argv[])
{
  int mode=UNKNOWN_MODE;
   
  int c;
  int option_index=0;

  while (true) {
    static struct option long_options[] =
    {
      {"train",      required_argument, 0, 't'},
      {"supervised", required_argument, 0, 's'},
      {"retrain",    required_argument, 0, 'r'},
      {"tagger",     no_argument,       0, 'g'},
      {"eval",       no_argument,       0, 'e'},
      //{"forhandtag", no_argument,       0, 'f'},
      {"help",       no_argument,       0, 'h'}, 
      {"debug",      no_argument,       0, 'd'}, 
      {0, 0, 0, 0}
    };

    //c=getopt_long(argc, argv, "t:sr:gefh",long_options, &option_index);
    c=getopt_long(argc, argv, "dt:s:r:geh",long_options, &option_index);
    if (c==-1)
      break;
      
    switch (c)
    {
      case 'd':
        debug=true;
        break;

      case 't':  //Training
        if(!is_number(optarg))
        {
	  cerr<<"Error: mandatory --train argument <n> must be zero or a positive integer\n";
	  help();
        }
        else 
	{
          nit = atoi(optarg); //Number of iterations
        }
        if(mode==UNKNOWN_MODE) 
        {
          mode=TRAIN_MODE;
        }
        else
        {
	  cerr<<"Error: --train <n> argument cannot be mixed with --retrain or --tagger arguments\n";
	  help();
	}
	break;
      
      case 's':
        if(!is_number(optarg))
        {
	  cerr<<"Error: mandatory --supervised argument <n> must be zero or a positive integer\n";
	  help();
        }
        else 
	{
          nit = atoi(optarg); //Number of iterations
        }

        if(mode==UNKNOWN_MODE) 
        {
          mode=TRAIN_SUPERVISED_MODE;
        }
        else
        {
	  cerr<<"Error: --supervised optional argument should only appear after --train <n> argument\n";
	  help();
	}
	break;
	
      case 'r':
        if(!is_number(optarg))
        {
	  cerr<<"Error: mandatory --train argument <n> must be zero or a positive integer\n";
          help();
        } 
        else
        { 
	  nit = atoi(optarg); //Number of iterations
        }
        if(mode==UNKNOWN_MODE) 
        {
          mode=RETRAIN_MODE; 
        }
        else
        {
	  cerr<<"Error: --retrain argument cannot be mixed with --train or --tagger arguments\n";
	  help();
	}
        break;
        
      case 'g': 
        if(mode==UNKNOWN_MODE)
        {
	  mode=TAGGER_MODE;
        }
        else 
        {
          cerr<<"Error: --tagger argument cannot be mixed with --train or --retrain arguments\n";
          help();
        }
        break;
         
      case 'e':
        if(mode==TAGGER_MODE)
        {
	  mode=TAGGER_EVAL_MODE;
        }
        else
        {
          cerr<<"Error: --eval optional argument should only appear after --tagger argument\n";
	  help();
	}
	break;
        
      case 'f': 
        if(mode==TAGGER_MODE)
        {
          mode=TAGGER_FORHANDTAG_MODE;
        }
        else
        {
          cerr<<"Error: --forhandtag optional argument should only appear after --tagger argument\n";
	  help();
	} 
	break;
        
      case 'h':
        help(); 
        break;
     
      default:
        //cerr<<"Error: getopt() returned the char code '"<<c<<"'\n";
        help();
        break;
    }    
  }

  if(mode==UNKNOWN_MODE)
  {
    cerr<<"Error: Arguments missing\n";
    help();
  }   
  
  switch(argc-optind)
  {
    case 6:
      if(mode != TRAIN_SUPERVISED_MODE)
      {
        help();
      }
      break;
    
    case 4:
      if(mode != TRAIN_MODE)
      {
        help();
      }
      break;
    case 3:
      if(mode != TAGGER_MODE)
      {
        help();
      }
      break;
      
    case 2:
      if(mode != RETRAIN_MODE && mode != TAGGER_MODE)
      {
        help();
      }
      break;
    
    case 1:
      if(mode != TAGGER_MODE)
      {
        help();
      }
      break;
    
    default:
      help();
      break;
  }

  for(int i = optind; i != argc; i++)
  {
    filenames.push_back(argv[i]);
  }
  
  return mode;
}

Tagger::Tagger()
{
  debug = false;
}

void
Tagger::main(int argc, char *argv[])
{
  name = argv[0];
  int mode = getMode(argc, argv);

  switch(mode)
  {
    case TRAIN_MODE:
      train();
      break;
    
    case TRAIN_SUPERVISED_MODE:
      trainSupervised();
      break;

    case RETRAIN_MODE:
      retrain();
      break;
      
    case TAGGER_MODE:
      tagger();
      break;

    default:
      cerr<<"Error: Unknown working mode mode\n";
      help();
      break;
  }
}

void
Tagger::tagger()
{
  FILE *ftdata = fopen(filenames[0].c_str(), "r");
  if(ferror(ftdata))
  {
    filerror(filenames[0]);
  }
  TaggerData td;
  td.read(ftdata);
  fclose(ftdata);
  
  HMM2 hmm(&td);
  
  if(filenames.size() == 1)
  {
    hmm.tagger(stdin, stdout);
  }
  else
  {
    FILE *finput = fopen(filenames[1].c_str(), "r");
    if(ferror(finput))
    {
      filerror(filenames[1]);
    }
    if(filenames.size() == 2)
    {
      hmm.tagger(finput, stdout);
    }
    else
    {
      FILE *foutput = fopen(filenames[2].c_str(), "w");
      if(ferror(foutput))
      {
        filerror(filenames[2]);
      }
      hmm.tagger(finput, foutput);
      fclose(foutput);
    }
    fclose(finput);
  }
}

void
Tagger::filerror(string const &filename)
{
  cerr << "Error: cannot open dictionary file '" << filenames[0] << "'\n";
  help();
}

void
Tagger::train()
{
  TSXReader treader;
  treader.read(filenames[2]);
  HMM2 hmm(&(treader.getTaggerData()));
  hmm.set_debug(debug);
  hmm.set_eos((treader.getTaggerData().getTagIndex())["TAG_SENT"]);
  TaggerWord::setArrayTags(treader.getTaggerData().getArrayTags());
  
  cerr << "Calculating ambiguity classes...\n";
  FILE *fdic = fopen(filenames[0].c_str(), "r");
  if(!ferror(fdic))
  {
    hmm.read_dictionary(fdic);
  }
  else
  {
    filerror(filenames[0]);
  }
  cerr << "Kupiec's initialization of transition and emission probabilities...\n";
  FILE *fcrp = fopen(filenames[1].c_str(), "r");
  if(!ferror(fcrp))
  {
    hmm.init_probabilities_kupiec(fcrp);               
  }
  else
  {
    filerror(filenames[1]);
  }
  
  cerr << "Applying forbid and enforce rules...\n";
  hmm.apply_rules();
  
  cerr << "Training (Baum-Welch)...\n";
  for(int i=0; i != nit; i++)
  {
    fseek(fcrp, 0, SEEK_SET);
    hmm.train(fcrp);
  }

  fclose(fdic);
  fclose(fcrp);
  treader.write(filenames[3]);
}

void
Tagger::trainSupervised()
{
  TSXReader treader;
  treader.read(filenames[2]);
  HMM2 hmm(&(treader.getTaggerData()));
  hmm.set_debug(debug);
  hmm.set_eos((treader.getTaggerData().getTagIndex())["TAG_SENT"]);
  TaggerWord::setArrayTags(treader.getTaggerData().getArrayTags());
  
  cerr << "Calculating ambiguity classes...\n";
  FILE *fdic = fopen(filenames[0].c_str(), "r");
  if(!ferror(fdic))
  {
    hmm.read_dictionary(fdic);
  }
  else
  {
    filerror(filenames[0]);
  }
  cerr << "Kupiec's initialization of transition and emission probabilities...\n";
  FILE *ftagged = fopen(filenames[4].c_str(), "r");
  FILE *funtagged = fopen(filenames[5].c_str(), "r");
  if(!ferror(ftagged) && !ferror(funtagged))
  {
    cerr << "Initializing transition and emission probabilities from a hand-tagged corpus...\n";
    hmm.init_probabilities_from_tagged_text(ftagged, funtagged);
  }
  else
  {
    filerror(filenames[4]+ "' or '" + filenames[5]);
  }
  fclose(ftagged);
  fclose(funtagged);
  
  cerr << "Applying forbid and enforce rules...\n";
  hmm.apply_rules();
  
  cerr << "Training (Baum-Welch)...\n";
  FILE *fcrp = fopen(filenames[1].c_str(), "r");
  if(!ferror(fcrp))
  {
    for(int i=0; i != nit; i++)
    {
      fseek(fcrp, 0, SEEK_SET);
      hmm.train(fcrp);
    }
  }
  else
  {
    filerror(filenames[1]);
  }

  fclose(fdic);
  fclose(fcrp);
  treader.write(filenames[3]);
}

void
Tagger::retrain()
{
  TaggerData td;
  FILE *ftdata = fopen(filenames[1].c_str(), "r");
  if(ferror(ftdata))
  {
    filerror(filenames[1]);
  }
  td.read(ftdata);
  fclose(ftdata);

  HMM2 hmm(&td);
  hmm.set_debug(debug);
  hmm.set_eos((td.getTagIndex())["TAG_SENT"]);
  TaggerWord::setArrayTags(td.getArrayTags());

  FILE *fcrp = fopen(filenames[0].c_str(), "r");
  if(ferror(fcrp))
  {
    filerror(filenames[0]);
  }
  cerr << "Training (Baum-Welch)...\n";
  for(int i=0; i != nit; i++)
  {
    fseek(fcrp, 0, SEEK_SET);
    hmm.train(fcrp);
  }
  fclose(fcrp);

  ftdata = fopen(filenames[1].c_str(), "w");
  if(ferror(ftdata))
  {
    filerror(filenames[1]);
  }
  td.write(ftdata);
  fclose(ftdata);
}

void
Tagger::help()
{
  ostream &out = cerr;
  char localname[name.size()+1];
  strcpy(localname, name.c_str());
  out << basename(localname) << ": HMM part-of-speech tagging and training program" << endl;
  out << "GENERIC USAGE: " << basename(localname) << "[-d] <OPTION>=[PARAM] [FILES]" << endl;
  out << "USAGE: " << basename(localname) << "[-d] -t=n DIC CRP TSX TAGGER_DATA" << endl;
  out << "       " << basename(localname) << "[-d] -s=n DIC CRP TSX TAGGER_DATA HTAG UNTAG" << endl;
  out << "       " << basename(localname) << "[-d] -r=n CRP TAGGER_DATA" << endl;
  out << "       " << basename(localname) << "[-d] -g TAGGER_DATA [INPUT [OUTPUT]]" << endl;
  out << endl;
  out << "Where OPTIONS are:" << endl;
  out << "  -t, --train=n:       performs n iterations of the Baum-Welch training" << endl;
  out << "                       algorithm (unsupervised)" << endl;
  out << "  -s, --supervised=n:  initializes parameters against a hand-tagged text" << endl;
  out << "                       (supervised), and trains it with n iterations" << endl;
  out << "  -r, --retrain=n:     retrains the model with n aditional Baum-Welch" << endl;
  out << "                       iterations (unsupervised)" << endl;
  out << "  -g, --tagger:        tags input text by means of Viterbi algorithm" << endl;
  out << "  -d, --debug:         print error mesages when tagging input text" << endl;
  out << endl;
  out << "And FILES are:" << endl;          
  out << "  DIC:         full expanded dictionary file" << endl;
  out << "  CRP:         training text corpus file" << endl;
  out << "  TSX:         tagger specificacion file, in XML format" << endl;
  out << "  TAGGER_DATA: tagger data file, built in the training and used while" << endl;
  out << "               tagging" << endl;
  out << "  HTAG:        hand-tagged text corpus" << endl;
  out << "  UNTAG:       untagged text corpus, morphological analysis of HTAG" << endl;
  out << "               corpus to use both jointly with -s option" << endl; 
  out << "  INPUT:       input file, stdin by default" << endl;
  out << "  OUTPUT:      output file, stdout by default" << endl;
  exit(EXIT_FAILURE);
}
