/*!**************************************************************************

  module      : Data_PrimTreeStatistic.hpp

  -------------------------------------------------------------------------

  responsible : TorstenS

  special area: File statistic
  description : Implementation of table statistics and column statistics.

                This class offers the possibility to get statistic 
                information to the given file. To reduce the runtime of
                the methods the functionality is implemented two times.
                On the one hand an exact calulation is done, which will be
                expensive for huge files and on the other hand an estimation
                is done, which will be cheaper but not exact.

  last changed: 2002-04-24
  see also    :

  -------------------------------------------------------------------------

  copyright:    (c) 2001-2004 SAP AG


    ========== licence begin  GPL
    Copyright (c) 2001-2005 SAP AG

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
    ========== licence end




*****************************************************************************/


#ifndef DATA_PRIM_TREE_STATISTIC_HPP
#define DATA_PRIM_TREE_STATISTIC_HPP



/*===========================================================================*
 *  INCLUDES                                                                 *
 *===========================================================================*/

#include "ggg00.h"
#include "gbd00.h"

#include "KernelCommon/Kernel_Common.hpp"
#include "DataAccess/Data_Types.hpp"
#include "DataAccess/Data_PrimPage.hpp"
#include "KernelCommon/Kernel_TableStatistic.hpp"


/*===========================================================================*
 *  DEFINES                                                                  *
 *===========================================================================*/


/*===========================================================================*
 *  MACROS                                                                   *
 *===========================================================================*/


/*===========================================================================*
 *  CLASSES, STRUCTURES, TYPES, UNIONS ...                                   *
 *===========================================================================*/


namespace {
    class TempStatisticRecord;
}

/*!*****************************************************************************

   class: Data_PrimTreeStatistic

  description : Implementation of table statistics and column statistics.
                This class offers the possibility to get statistic 
                information to the given file. To reduce the runtime of
                the methods the functionality is implemented two times.
                On the one hand an exact calulation is done, which will be
                expensive for huge files and on the other hand an estimation
                is done, which will be cheaper but not exact.
              
*******************************************************************************/

class Data_PrimTreeStatistic
{
public:

    /* -----------------------------------------------------------------------*/
    /*! chapter: Constructors and initialization                              */
    /* -----------------------------------------------------------------------*/

    /*!----------------------------------------------------------------------
    function:     Data_PrimTreeStatistic()
    description:  Constructs a handle for primary tree statistics
    arguments:    Current [in/out] identifier for the file to be handled
    return value: none
    -----------------------------------------------------------------------*/

    Data_PrimTreeStatistic( tbd_current_tree    &current )
    :
    m_Current( current ),
    m_Trans( *current.curr_trans ),
    m_TrError( current.curr_trans->trError_gg00 )
    {}

    /* -----------------------------------------------------------------------*/
    /*! endchapter: constructors and initialization                           */
    /* -----------------------------------------------------------------------*/

    /* -----------------------------------------------------------------------*/
    /*! chapter: Determine statistics                                         */
    /* -----------------------------------------------------------------------*/

    /*!-------------------------------------------------------------------------
    function:     CalculateColumnStatistic()
    description:  This method is used to determine the exact number of 
                  distinct values in a given file. Furthermore the number of
                  records and the number of leaves are counted. Note that this
                  method reads all leaf nodes of the given file, which will
                  take time if the given file is not small. The columns to be
                  examined concerning the distinct values will be described by
                  the parameter pColumnDescription.
    arguments:    tempCurrent [in]          temporary file to hold the distinct values
                  columnCount [in]          number of column desciptions within 
                                            pColumnDescription
                  columnFirstPosition [in]  position of first column desciptions 
                                            within pColumnDescription
                  columnLastPosition [in]   position of the last column desciptions
                                            within pColumnDescription
                  pColumnDescription [in]   column descriptions
                  numberOfLeaves [out]      number of counted leaf nodes 
                  numberOfRecords [out]     number of counted records
                  distinctValues [out]      contains the number of distinct values
                                            for every given column.
    return value: none
    --------------------------------------------------------------------------*/

    void CalculateColumnStatistic(
        tbd_current_tree            &tempCurrent,
        const SAPDB_Int2            columnCount,
        SAPDB_Int2                  columnFirstPosition,
        SAPDB_Int2                  columnLastPosition,
        const tgg00_StEntryAddr     pColumnDescription,
        Data_PageCount              &numberOfLeaves,
        Data_PageCount              &numberOfRecords,
        tgg00_ColumnDistinctValues  &distinctValues );

    /*!-------------------------------------------------------------------------
    function:     SampleColumnStatistic()
    description:  This method is used to estimate the number of distinct values 
                  in a given file. Furthermore the number of records and the 
                  number of leaves are estimated. This method will read the leaf
                  pages step by step whereby the step width is given by the parameter
                  stepWidth. The columns to be examined concerning the distinct values 
                  will be described by the parameter pColumnDescription..
    arguments:    tempCurrent         [in]  temporary file to hold the distinct values
                  stepWidth           [in]  step width used to go from one leaf to another
                  columnCount         [in]  number of column desciptions within 
                                            pColumnDescription
                  columnFirstPosition [in]  position of first column desciptions 
                                            within pColumnDescription
                  columnLastPosition  [in]  position of the last column desciptions
                                            within pColumnDescription
                  pColumnDescription  [in]  column descriptions
                  numberOfLeaves     [out]  number of counted leaf nodes 
                  numberOfRecords    [out]  number of counted records
                  distinctValues     [out]  contains the number of distinct values
                                            for every given column.
    return value: none
    --------------------------------------------------------------------------*/

    void SampleColumnStatistic(
        tbd_current_tree            &tempCurrent,
        const SAPDB_UInt            stepWidth,
        const SAPDB_Int2            columnCount,
        SAPDB_Int2                  columnFirstPosition,
        SAPDB_Int2                  columnLastPosition,
        const tgg00_StEntryAddr     pColumnDescription,
        Data_PageCount              &numberOfLeaves,
        Data_PageCount              &numberOfRecords,
        tgg00_ColumnDistinctValues  &distinctValues );

    /*!-------------------------------------------------------------------------
    function:     SampleNewColumnStatistic()
    description:  This method is used to estimate the number of distinct values 
                  in a given file. Furthermore the number of records and the 
                  number of leaves are estimated. This method will read as much
                  leaf nodes as given in parameter leafNodesToSample. Note
                  that the used estimation algorithmn from Thomas Koetter expects
                  that no leaf will be examined twice during the sampling. The 
                  columns to be examined concerning the distinct values will be 
                  described by the parameter pColumnDescription..
    arguments:    tempCurrent [in]          temporary file to hold the distinct values
                  leafNodesToSample [in]    number of leaf nodes to be looked at.
                  columnCount [in]          number of column desciptions within 
                                            pColumnDescription
                  columnFirstPosition [in]  position of first column desciptions 
                                            within pColumnDescription
                  columnLastPosition [in]   position of the last column desciptions
                                            within pColumnDescription
                  pColumnDescription [in]   column descriptions
                  numberOfLeaves [out]      number of counted leaf nodes 
                  numberOfRecords [out]     number of counted records
                  distinctValues [out]      contains the number of distinct values
                                            for every given column.
    return value: none
    --------------------------------------------------------------------------*/

    void SampleNewColumnStatistic(
        tbd_current_tree            &tempCurrent,
        const Data_PageCount        leafNodesToSample,
        const SAPDB_Int2            columnCount,
        SAPDB_Int2                  columnFirstPosition,
        SAPDB_Int2                  columnLastPosition,
        const tgg00_StEntryAddr     pColumnDescription,
        Data_PageCount              &numberOfLeaves,
        Data_PageCount              &numberOfRecords,
        tgg00_ColumnDistinctValues  &distinctValues );

    /*!-------------------------------------------------------------------------
    function:     CalculateRecordsAndLeaves()
    description:  This method is used to calculate the exact number of leaves
                  and records within the given file. Note that this method reads 
                  all leaf nodes of the given file, which will take time if the 
                  given file is not small.
    arguments:    numberOfLeaves [out]  number of counted leaf nodes 
                  numberOfRecords [out] number of counted records
    return value: none
    --------------------------------------------------------------------------*/

    void CalculateRecordsAndLeaves(
        Data_PageCount  &numberOfLeaves,
        Data_PageCount  &numberOfRecords );

    /*!-------------------------------------------------------------------------
    function:     SampleRecordsAndLeaves()
    description:  This method is used to estimate the number of leaves and
                  records within the given file. This method will read as much
                  leaf nodes as given in parameter leafNodesToSample. Note
                  that the used estimation algorithmn from Thomas Koetter expects
                  that no leaf will be examined twice during the sampling.
    arguments:    leafNodesToSample [in] number of leaf nodes to be looked at.
                  numberOfLeaves [out]   number of counted leaf nodes 
                  numberOfRecords [out]  number of counted records
    return value: none
    --------------------------------------------------------------------------*/

    void SampleRecordsAndLeaves(
        const Data_PageCount    leafNodesToSample,
        Data_PageCount          &numberOfLeaves,
        Data_PageCount          &numberOfRecords );

    /*!-------------------------------------------------------------------------
    function:     CalculateTableStatistic()
    description:  This method is used to determine the table statistics for
                  the given file. Note that this method reads all nodes 
                  (leaf and non leaf) of the given file, which will take time 
                  if the given file is not small.
    arguments:    numberOfLongColumns [in] number of defined long columns
                  tableStatistic [in/out]  calculated table statistics
    return value: none
    --------------------------------------------------------------------------*/

    void CalculateTableStatistic(
        const SAPDB_UInt        numberOfLongColumns,
        Kernel_TableStatistic   &tableStatistic );

    static SAPDB_Int4 EstimateDistinctValuesPerColumn(
        const SAPDB_Real8   estimatedNumberOfRecords,
        const SAPDB_Real8   sampledNumberOfRecords,
        const SAPDB_Real8   distinctInSample );

    /* -----------------------------------------------------------------------*/
    /*! endchapter: Determine statistics                                      */
    /* -----------------------------------------------------------------------*/

private:

    // Copy and assignment operator are not supported

    Data_PrimTreeStatistic& operator=( const Data_PrimTreeStatistic &pts );

    Data_PrimTreeStatistic( const Data_PrimTreeStatistic &pts );


    SAPDB_Bool ColumnStatisticForSinglePage(
        const SAPDB_Bool1            bCountDuplicates,
        tbd_current_tree             &tempCurrent,
        TempStatisticRecord          &tempRecord,
        Data_PrimPage                &primPage,
        const SAPDB_Int2             columnCount,
        const SAPDB_Int2             columnFirstPosition,
        const SAPDB_Int2             columnLastPosition,
        const tgg00_StEntryAddr      pColumnDescription,
        tgg00_ColumnDistinctValues   &distinctValues );

    void TableStatisticForSingleLeafPage(
        Data_PrimPage           &primPage,
        Kernel_TableStatistic   &tableStatistic );

    void TableStatisticForSingleLeafPageWithLongColumns(
        const SAPDB_UInt        numberOfLongColumns,
        Data_PrimPage           &primPage,
        Kernel_TableStatistic   &tableStatistic );

    void TableStatisticForSingleNonLeafPage(
        Data_PrimPage           &primPage,
        Kernel_TableStatistic   &tableStatistic );

    void NewEstimateNumberOfDistinctValues(
        const SAPDB_Int8             estimatedNumberOfLeafPages,
        const SAPDB_Int8             estimatedNumberOfTableRecords,
        const SAPDB_Int8             sampleSize,
        const SAPDB_Int2             columnCount,
        tbd_current_tree             &tempCurrent,
        tgg00_ColumnDistinctValues   &distinctValues );

    void EstimateNumberOfDistinctValues(
        const SAPDB_Real8            estimatedNumberOfRecords,
        const SAPDB_Real8            sampledNumberOfRecords,
        const SAPDB_Int2             columnCount,
        tgg00_ColumnDistinctValues   &distinctValues );

    SAPDB_Bool DataBaseFull( const Data_PageCount   numPagesRequested );

    SAPDB_Bool CommandCanceled();

private:

    /*-------------------------------------------------------------------------
    declaration: m_Current
    description: Handle for the primary file to be considered
    --------------------------------------------------------------------------*/

    tbd_current_tree    &m_Current;

    /*-------------------------------------------------------------------------
    declaration: m_Trans 
    description: Context of the current transaction
    --------------------------------------------------------------------------*/

    tgg00_TransContext  &m_Trans;

    /*-------------------------------------------------------------------------
    declaration: m_TrError
    description: Current error state
    --------------------------------------------------------------------------*/

    tgg00_BasisError    &m_TrError;

};

/*!*****************************************************************************

   endclass: Data_PrimTreeStatistic

*******************************************************************************/

#endif  /* DATA_PRIM_TREE_STATISTIC_HPP */

