//                                               -*- C++ -*-
/**
 *  @file  FittingTest.cxx
 *  @brief StatTest implements statistical tests
 *
 *  (C) Copyright 2005-2007 EDF-EADS-Phimeca
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License.
 *
 *  This library is distributed in the hope that it will be useful
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 *  @author: $LastChangedBy: dutka $
 *  @date:   $LastChangedDate: 2008-10-31 11:52:04 +0100 (ven 31 oct 2008) $
 *  Id:      $Id: FittingTest.cxx 995 2008-10-31 10:52:04Z dutka $
 */
#include <cmath>
#include <fstream>
#include "FittingTest.hxx"
#include "NumericalPoint.hxx"
#include "Description.hxx"
#include "Path.hxx"
#include "ResourceMap.hxx"
#include "Log.hxx"
#include "DistFunc.hxx"

namespace OpenTURNS
{
  namespace Uncertainty
  {
    namespace StatTest
    {

      typedef Base::Common::Path         Path;
      typedef Base::Common::ResourceMap  ResourceMap;
      typedef Base::Common::Log          Log;
      typedef Base::Type::NumericalPoint NumericalPoint;
      typedef Base::Type::Description    Description;
      typedef Distribution::DistFunc     DistFunc;

      /* Default constructor, needed by SWIG */
      FittingTest::FittingTest()
      {
	// Nothing to do
      }

      /* Best model for a given numerical sample by BIC */
      FittingTest::Distribution FittingTest::BestModelBIC(const NumericalSample  & sample,
							  const FactoryCollection & factoryCollection)
      {
	const UnsignedLong size(factoryCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	Distribution bestDistribution(factoryCollection[0].buildImplementation(sample));
	NumericalScalar bestConcordanceMeasure(BIC(sample, bestDistribution, bestDistribution.getParametersNumber()));
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(factoryCollection[i].buildImplementation(sample));
	    const NumericalScalar concordanceMeasure(BIC(sample, distribution, distribution.getParametersNumber()));
	    if (concordanceMeasure < bestConcordanceMeasure)
	      {
		bestConcordanceMeasure = concordanceMeasure;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }

      /* Best model for a given numerical sample by BIC */
      FittingTest::Distribution FittingTest::BestModelBIC(const NumericalSample  & sample,
							  const DistributionCollection & distributionCollection)
      {
	const UnsignedLong size(distributionCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	Distribution bestDistribution(distributionCollection[0]);
	NumericalScalar bestConcordanceMeasure(BIC(sample, bestDistribution));
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(distributionCollection[i]);
	    const NumericalScalar concordanceMeasure(BIC(sample, distribution));
	    if (concordanceMeasure < bestConcordanceMeasure)
	      {
		bestConcordanceMeasure = concordanceMeasure;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }




      /* Best model for a given numerical sample by Kolmogorov */
      FittingTest::Distribution FittingTest::BestModelKolmogorov(const NumericalSample  & sample,
								 const FactoryCollection & factoryCollection)
      {
	const UnsignedLong size(factoryCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	const NumericalScalar fakeLevel(0.5);
	Distribution bestDistribution(factoryCollection[0].buildImplementation(sample));
	NumericalScalar bestPValue(Kolmogorov(sample, bestDistribution, fakeLevel, bestDistribution.getParametersNumber()).getPValue());
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(factoryCollection[i].buildImplementation(sample));
	    const NumericalScalar pValue(Kolmogorov(sample, distribution, fakeLevel, distribution.getParametersNumber()).getPValue());
	    if (pValue > bestPValue)
	      {
		bestPValue = pValue;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }

      /* Best model for a given numerical sample by Kolmogorov */
      FittingTest::Distribution FittingTest::BestModelKolmogorov(const NumericalSample  & sample,
								 const DistributionCollection & distributionCollection)
      {
	const UnsignedLong size(distributionCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	Distribution bestDistribution(distributionCollection[0]);
	NumericalScalar bestPValue(Kolmogorov(sample, bestDistribution).getPValue());
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(distributionCollection[i]);
	    const NumericalScalar pValue(Kolmogorov(sample, distribution).getPValue());
	    if (pValue > bestPValue)
	      {
		bestPValue = pValue;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }


      /* Best model for a given numerical sample by ChiSquared */
      FittingTest::Distribution FittingTest::BestModelChiSquared(const NumericalSample  & sample,
								 const FactoryCollection & factoryCollection)
      {
	const UnsignedLong size(factoryCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	const NumericalScalar fakeLevel(0.5);
	Distribution bestDistribution(factoryCollection[0].buildImplementation(sample));
	NumericalScalar bestPValue(ChiSquared(sample, bestDistribution, fakeLevel, bestDistribution.getParametersNumber()).getPValue());
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(factoryCollection[i].buildImplementation(sample));
	    const NumericalScalar pValue(ChiSquared(sample, distribution, fakeLevel, distribution.getParametersNumber()).getPValue());
	    if (pValue > bestPValue)
	      {
		bestPValue = pValue;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }

      /* Best model for a given numerical sample by ChiSquared */
      FittingTest::Distribution FittingTest::BestModelChiSquared(const NumericalSample  & sample,
								 const DistributionCollection & distributionCollection)
      {
	const UnsignedLong size(distributionCollection.getSize());
	if (size == 0) throw InternalException(HERE) << "Error: no model given";
	Distribution bestDistribution(distributionCollection[0]);
	NumericalScalar bestPValue(ChiSquared(sample, bestDistribution).getPValue());
	for (UnsignedLong i = 1; i < size; ++i)
	  {
	    const Distribution distribution(distributionCollection[i]);
	    const NumericalScalar pValue(ChiSquared(sample, distribution).getPValue());
	    if (pValue > bestPValue)
	      {
		bestPValue = pValue;
		bestDistribution = distribution;
	      }
	  }
	return bestDistribution;
      }

      /* Bayesian Information Criterion computation */
      NumericalScalar FittingTest::BIC(const NumericalSample & sample,
				       const Distribution & distribution,
				       const UnsignedLong estimatedParameters)
	throw(InvalidArgumentException)
      {
	if (sample.getDimension() != distribution.getDimension()) throw InvalidArgumentException(HERE) << "Error: the sample dimension and the distribution dimension must be equal";
	const UnsignedLong size(sample.getSize());
	const UnsignedLong parametersNumber(distribution.getParametersNumber());
	if (parametersNumber < estimatedParameters) throw InvalidArgumentException(HERE) << "Error: the number of estimated parameters cannot exceed the number of parameters of the distribution";
	NumericalScalar logLikelihood(0.0);
	for (UnsignedLong i = 0; i < size; ++i)
	  {
	    logLikelihood += log(distribution.computePDF(sample[i]));
	  }
	return (-2.0 * logLikelihood + estimatedParameters * log(size)) / size;
      }

      /* Bayesian Information Criterion computation */
      NumericalScalar FittingTest::BIC(const NumericalSample & sample,
				       const Factory & factory)
	throw(InvalidArgumentException)
      {
	const Distribution distribution(factory.buildImplementation(sample));
	return BIC(sample, distribution, distribution.getParametersNumber());
      }


      /* Kolmogorov test */
      FittingTest::TestResult FittingTest::Kolmogorov(const NumericalSample & sample,
						      const Factory & factory,
						      const NumericalScalar level)
	throw(InvalidArgumentException)
      {
	if ((level <= 0.0) || (level >= 1.0)) throw InvalidArgumentException(HERE) << "Error: level must be in ]0, 1[, here level=" << level;
	if (sample.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test works only with 1D samples";
	const Distribution distribution(factory.buildImplementation(sample));
	if (!distribution.getImplementation()->isContinuous()) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test can be applied only to a continuous distribution";
	if (distribution.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test works only with 1D distribution";
	return Kolmogorov(sample, distribution, level, distribution.getParametersNumber());
      }


      /* Kolmogorov test */
      FittingTest::TestResult FittingTest::Kolmogorov(const NumericalSample & sample,
						      const Distribution & distribution,
						      const NumericalScalar level,
						      const UnsignedLong estimatedParameters)
	throw(InvalidArgumentException)
      {
	if ((level <= 0.0) || (level >= 1.0)) throw InvalidArgumentException(HERE) << "Error: level must be in ]0, 1[, here level=" << level;
	if (sample.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test works only with 1D samples";
	if (!distribution.getImplementation()->isContinuous()) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test can be applied only to a continuous distribution";
	if (distribution.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: Kolmogorov test works only with 1D distribution";
	if (estimatedParameters > 0) Log::Info("Warning: using Kolmogorov test for a distribution with estimated parameters will result in an overestimated pValue");
	const NumericalSample sortedSample(sample.sort(0));
	const UnsignedLong size(sample.getSize());
	NumericalScalar value(0.0);
	for (UnsignedLong i = 0; i < size; ++i)
	  {
	    const NumericalScalar cdfValue(distribution.computeCDF(sortedSample[i]));
	    value = std::max(value, std::max(fabs(NumericalScalar(i) / size - cdfValue), fabs(cdfValue - NumericalScalar(i + 1) / size)));
	  }
	const NumericalScalar pValue(DistFunc::pKolmogorov(size, value, true));
	TestResult result(OSS() << "Kolmogorov" << distribution.getClassName(), (pValue > 1.0 - level), pValue, 1.0 - level);
	return result;
      }

      /* Chi-squared test */
      FittingTest::TestResult FittingTest::ChiSquared(const NumericalSample & sample,
						      const Factory & factory,
						      const NumericalScalar level)
	throw(InvalidArgumentException)
      {
	if ((level <= 0.0) || (level >= 1.0)) throw InvalidArgumentException(HERE) << "Error: level must be in ]0, 1[, here level=" << level;
	if (sample.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: ChiSquared test works only with 1D samples";
	const Distribution distribution(factory.buildImplementation(sample));
	if (distribution.getImplementation()->isContinuous()) throw InvalidArgumentException(HERE) << "Error: Chi-squared test cannot be applied to a continuous distribution";
	if (distribution.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: ChiSquared test works only with 1D distribution";
	return ChiSquared(sample, distribution, level, distribution.getParametersNumber());
      }


      /* Chi-squared test */
      FittingTest::TestResult FittingTest::ChiSquared(const NumericalSample & sample,
						      const Distribution & distribution,
						      const NumericalScalar level,
						      const UnsignedLong estimatedParameters)
	throw(InvalidArgumentException)
      {
	if ((level <= 0.0) || (level >= 1.0)) throw InvalidArgumentException(HERE) << "Error: level must be in ]0, 1[, here level=" << level;
	if (sample.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: ChiSquared test works only with 1D samples";
	if (distribution.getImplementation()->isContinuous()) throw InvalidArgumentException(HERE) << "Error: Chi-squared test cannot be applied to a continuous distribution";
	if (distribution.getDimension() != 1) throw InvalidArgumentException(HERE) << "Error: ChiSquared test works only with 1D distribution";
	return RunRTest(sample, distribution, level, estimatedParameters, "ChiSquared");
      }

      /* Generic invocation of a R script for testing a distribution against a sample */
      FittingTest::TestResult FittingTest::RunRTest(const NumericalSample & sample,
						    const Distribution & distribution,
						    const NumericalScalar level,
						    const UnsignedLong estimatedParameters,
						    const String & testName)
	throw(InternalException)
      {
	const String dataFileName(sample.storeToTemporaryFile());
	const String resultFileName(Path::BuildTemporaryFileName("RResult.txt.XXXXXX"));
	const String commandFileName(Path::BuildTemporaryFileName("RCmd.R.XXXXXX"));
	std::ofstream cmdFile(commandFileName.c_str(), std::ios::out);
	// Fill-in the command file
	cmdFile << "library(rotRPackage)" << std::endl;
	cmdFile << "options(digits=17)" << std::endl;
	cmdFile << "options(warn=-1)" << std::endl;
	cmdFile << "sample <- data.matrix(read.table(\"" << dataFileName << "\"))" << std::endl;
	cmdFile << "res <- computeTest" << testName << distribution.getImplementation()->getClassName();
	cmdFile << "(sample, ";
	const NumericalPoint parameters(distribution.getParametersCollection()[0]);
	const UnsignedLong parametersNumber(parameters.getDimension());
	for (UnsignedLong i = 0; i < parametersNumber; ++i)
	  {
	    cmdFile << parameters[i] << ", ";
	  }
        cmdFile << level << ", " << estimatedParameters << ")" << std::endl;
	cmdFile << "f <- file(\"" << resultFileName << "\",\"wt\")" << std::endl;
	cmdFile << "cat(res$test, res$testResult, res$threshold, res$pValue, sep=\"\\n\", file=f)" << std::endl;
	cmdFile << "close(f)" << std::endl;
	cmdFile.close();

	OSS systemCommand;
	systemCommand << ResourceMap::GetInstance().get("R-executable-command") << " --no-save --silent < " << commandFileName << " 2>&1 > /dev/null";
	const int returnCode(system(String(systemCommand).c_str()));
	if (returnCode != 0) throw InternalException(HERE) << "Error: unable to execute the system command " << String(systemCommand) << " returned code is " << returnCode;
	// Parse result file
	std::ifstream resultFile(resultFileName.c_str(), std::ios::in);
	String testType;
	resultFile >> testType;
	Bool testResult;
	resultFile >> testResult;
	NumericalScalar pThreshold;
	resultFile >> pThreshold;
	NumericalScalar pValue;
	resultFile >> pValue;

	// We only remove those files for low debug levels
#if OT_DEBUG_LEVEL < 6
	// Clean-up everything
	if (remove(dataFileName.c_str()) == -1) throw InternalException(HERE) << "Error: cannot remove file " << dataFileName;
	if (remove(resultFileName.c_str()) == -1) throw InternalException(HERE) << "Error: cannot remove file " << dataFileName;
	if (remove(commandFileName.c_str()) == -1) throw InternalException(HERE) << "Error: cannot remove file " << dataFileName;
#endif
	return TestResult(testType, testResult, pValue, pThreshold);
      }


    } // namespace StatTest
  } // namespace Uncertainty
} // namespace OpenTURNS
