/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**************************** mpeg decoder ***********************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fame.h"
#include "fame_malloc.h"
#include "fame_decoder.h"
#include "fame_decoder_mpeg.h"
#include "table_scale.h"
#if defined(HAS_MMX)
#define arch_enter_state()
#define arch_leave_state() asm("emms")
#include "transpose_mmx.h"
#include "idct_mmx.h"
#include "dequantize_mmx.h"
#include "reconstruct_mmx.h"
#include "pad_int.h" /* TODO */
#include "half_mmx.h"
#else
#define arch_enter_state() 
#define arch_leave_state() 
#include "idct_float.h"
#include "dequantize_float.h"
#include "reconstruct_float.h"
#include "pad_int.h"
#include "half_int.h"
#endif

static void mpeg_init(fame_decoder_t *decoder,
		      int width,
		      int height,
		      unsigned char *intra_quantisation_table,
		      unsigned char *inter_quantisation_table,
		      unsigned char *intra_dc_y_scale_table,
		      unsigned char *intra_dc_c_scale_table,
		      fame_mismatch_t mismatch_type);
static void mpeg_enter(fame_decoder_t *decoder,
			fame_yuv_t **past_ref,
			fame_yuv_t **new_ref,
			fame_yuv_t **future_ref,
			fame_yuv_t *yuv,
			unsigned char *shape);
static void mpeg_reconstruct_intra_mb(fame_decoder_t *decoder,
				      short x,
				      short y,
				      short *blocks[6],
				      unsigned char q,
				      fame_bab_t bab_type);
static void mpeg_reconstruct_inter_mb(fame_decoder_t *decoder,
				      short x,
				      short y,
				      short *blocks[6],
				      fame_motion_vector_t *forward,
				      fame_motion_vector_t *backward,
				      fame_motion_coding_t motion_coding,
				      unsigned char q,
				      fame_bab_t bab_type);
static void mpeg_pad(fame_decoder_t *decoder,
		     unsigned char *bab_map,
		     fame_box_t *box);
static void mpeg_interpolate(fame_decoder_t *decoder, int rounding);
static void mpeg_leave(fame_decoder_t *decoder);
static void mpeg_close(fame_decoder_t *decoder);

FAME_CONSTRUCTOR(fame_decoder_mpeg_t)
{
  FAME_OBJECT(this)->name = "MPEG decoder";
  FAME_DECODER(this)->init = mpeg_init;
  FAME_DECODER(this)->enter = mpeg_enter;
  FAME_DECODER(this)->reconstruct_intra_mb = mpeg_reconstruct_intra_mb;
  FAME_DECODER(this)->reconstruct_inter_mb = mpeg_reconstruct_inter_mb;
  FAME_DECODER(this)->pad = mpeg_pad;
  FAME_DECODER(this)->interpolate = mpeg_interpolate;
  FAME_DECODER(this)->leave = mpeg_leave;
  FAME_DECODER(this)->close = mpeg_close;
  return(this);
}

/*  mpeg_init                                                                */
/*                                                                           */
/*  Description:                                                             */
/*    Initialize the decoder.                                                */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder to initialize                     */
/*    int width: width of the frame                                          */
/*    int height: height of the frame                                        */
/*    unsigned char *intra_quantisation_table: quantisation matrix for intra */
/*    unsigned char *inter_quantisation_table: quantisation matrix for inter */
/*    unsigned char *intra_dc_y_scale_table: quantisation table for DC of Y  */
/*    unsigned char *intra_dc_c_scale_table: quantisation table for DC of C  */
/*    fame_mismatch_t mismatch_type: type of mismatch control                */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_init(fame_decoder_t *decoder,
		      int width,
		      int height,
		      unsigned char *iqtable,
		      unsigned char *niqtable,
		      unsigned char *intra_dc_y_scale_table,
		      unsigned char *intra_dc_c_scale_table,
		      fame_mismatch_t mismatch_type)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
  int i, q;

  /* set width and height */
  decoder_mpeg->width = width;
  decoder_mpeg->height = height;
  decoder_mpeg->mismatch = mismatch_type;
#ifdef HAS_MMX
  if(mismatch_type == fame_mismatch_global)
    for(i = 0; i < 6; i++) {
      decoder_mpeg->mismatch_accumulator[i] = 
	(dct_t *) fame_malloc((decoder_mpeg->width>>3)*
			      (decoder_mpeg->height>>3)*sizeof(dct_t));
      memset(decoder_mpeg->mismatch_accumulator[i], 0,
	     (decoder_mpeg->width>>3)*
	     (decoder_mpeg->height>>3)*sizeof(dct_t));
    }
#endif
    
  /* compute quantization matrixes */
  for(q = 1; q < 32; q++) {
    /* compute the intra quantisation and dequantisation DC scaler */
#ifdef HAS_MMX
    asm("emms");
    decoder_mpeg->yidqmatrixes[q][0] =
      (short) (intra_dc_y_scale_table[q] << 3);
    decoder_mpeg->cidqmatrixes[q][0] =
      (short) (intra_dc_c_scale_table[q] << 3);
#else
    decoder_mpeg->yidqmatrixes[q][0] = intra_dc_y_scale_table[q];
    decoder_mpeg->cidqmatrixes[q][0] = intra_dc_c_scale_table[q];
#endif

    /* compute the intra quantisation and dequantisation matrix */
    for(i = 1; i < 64; i++)
    {
#ifdef HAS_MMX
      decoder_mpeg->yidqmatrixes[q][i] = decoder_mpeg->cidqmatrixes[q][i] =
	(short) q*iqtable[i];
#else
      decoder_mpeg->yidqmatrixes[q][i] = decoder_mpeg->cidqmatrixes[q][i] =
	q*iqtable[i];
#endif
    }

    /* compute the inter quantisation and dequantisation matrix */
    for(i = 0; i < 64; i++)
    {
#ifdef HAS_MMX
      decoder_mpeg->nidqmatrixes[q][i] = (short) q*niqtable[i];
      decoder_mpeg->psmatrix[i] = (short) ((double)(1UL << 16) * prescale[i] + 0.5);
#else
      decoder_mpeg->nidqmatrixes[q][i] = q*niqtable[i];
      decoder_mpeg->psmatrix[i] = prescale[i];
#endif
    }		     
  }
}

/*  mpeg_enter                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Start encoding a new picture.                                          */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*    fame_yuv_t **past_ref: past reference images                           */
/*    fame_yuv_t **new_ref: new reconstructed reference images               */
/*    fame_yuv_t **future_ref: future reference images                       */
/*    fame_yuv_t *yuv: source image                                          */
/*    unsigned char *shape: shape binary mask                                */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */
  
static void mpeg_enter(fame_decoder_t *decoder,
			fame_yuv_t **past_ref,
			fame_yuv_t **new_ref,
			fame_yuv_t **future_ref,
			fame_yuv_t *yuv,
			unsigned char *shape)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);

  /* Make pointers on the input frame and reference frame */
  decoder_mpeg->input = yuv;
  decoder_mpeg->past_ref = past_ref;
  decoder_mpeg->new_ref = new_ref;
  decoder_mpeg->future_ref = future_ref;
  decoder_mpeg->shape = shape;
  arch_enter_state();
}

/*  mpeg_pad_mb                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    Perform repetitive padding for motion estimation on a border block.    */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*    short x: the x location of the macroblock in macroblock units          */
/*    short y: the y location of the macroblock in macroblock units          */
/*    fame_box_t box: bounding box                                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_pad_mb(fame_decoder_t *decoder,
			short x,
			short y)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
  int spitch, rpitch;
  unsigned char *shape, *Y, *U, *V;

  /* Make offsets to blocks */
  spitch = decoder_mpeg->input->p;
  shape = decoder_mpeg->shape + (y << 4) * spitch + (x << 4);

  /* Make offsets to blocks */
  rpitch = decoder_mpeg->new_ref[0]->p;
  Y = decoder_mpeg->new_ref[0]->y + (y << 4) * rpitch + (x << 4); /* Y */
  U = decoder_mpeg->new_ref[0]->u + (y << 3) * (rpitch >> 1) + (x << 3); /*Cb*/
  V = decoder_mpeg->new_ref[0]->v + (y << 3) * (rpitch >> 1) + (x << 3); /*Cr*/

  repetitive_fill_Y(Y, shape, rpitch, spitch);
  repetitive_fill_C(U, shape, rpitch, spitch);
  repetitive_fill_C(V, shape, rpitch, spitch);
}

/*  mpeg_reconstruct_intra_mb                                                */
/*                                                                           */
/*  Description:                                                             */
/*    Reconstruct an intra macroblock for further motion estimation.         */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*    bitbuffer_t *bb: a bit buffer to write the resulting encoded data to.  */
/*    short x: the x location of the macroblock in macroblock units          */
/*    short y: the y location of the macroblock in macroblock units          */
/*    short *blocks[6]:  the DCT coded blocks                                */
/*    unsigned char q: the quantizer scale for this block                    */
/*    fame_bab_t bab_type: binary alpha block type                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_reconstruct_intra_mb(fame_decoder_t *decoder,
				      short x,
				      short y,
				      short *blocks[6],
				      unsigned char q,
				      fame_bab_t bab_type)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
  unsigned long offset0,offset1,offset2,offset3,offset4,offset5;
  int pitch;
  void (* dequantize)(short *block,
		      dct_t *cache,
		      dct_t *dqmatrix,
		      dct_t *psmatrix,
		      dct_t *mismatch);
  void (* idct_)(dct_t *block);
  void (* reconstruct_)(unsigned char *plane,
			dct_t *block,
			int pitch);

  pitch = decoder_mpeg->new_ref[0]->p;

  /* Make offsets to blocks */
  offset0 = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
  offset1 = offset0 + 8;                       /* Y(0,1) */
  offset2 = offset0 + (pitch << 3);            /* Y(1,0) */
  offset3 = offset2 + 8;                       /* Y(1,1) */
  offset4 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
  offset5 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */

  if(decoder_mpeg->mismatch == fame_mismatch_local)
    dequantize = dequantize_intra_local;
  else
    dequantize = dequantize_intra_global;
  idct_ = idct;
  reconstruct_ = reconstruct;

  /* Reconstruct blocks */
  /* Y(0,0) */
  dequantize(blocks[0],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->yidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[0][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->y + offset0,
	       decoder_mpeg->tmpblock,
	       pitch);

  dequantize(blocks[1],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->yidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[1][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->y + offset1,
	       decoder_mpeg->tmpblock,
	       pitch);

  dequantize(blocks[2],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->yidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[2][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->y + offset2,
	       decoder_mpeg->tmpblock,
	       pitch);

  dequantize(blocks[3],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->yidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[3][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->y + offset3,
	       decoder_mpeg->tmpblock,
	       pitch);
  
  /* U */
  dequantize(blocks[4],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->cidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[4][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->u + offset4,
	       decoder_mpeg->tmpblock,
	       pitch >> 1);
  
  /* V */
  dequantize(blocks[5],
	     decoder_mpeg->tmpblock,
	     decoder_mpeg->cidqmatrixes[q],
	     decoder_mpeg->psmatrix,
	     &decoder_mpeg->mismatch_accumulator[5][y*(pitch>>3)+x]);
  idct_(decoder_mpeg->tmpblock);
  reconstruct_(decoder_mpeg->new_ref[0]->v + offset5,
	       decoder_mpeg->tmpblock,
	       pitch >> 1);

  /* fill the block if needed */
  if(bab_type >= bab_border_16x16)
    mpeg_pad_mb(decoder, x, y);
}

/*  mpeg_reconstruct_inter_mb                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Reconstruct an inter macroblock for further motion estimation.         */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*    bitbuffer_t *bb: a bit buffer to write the resulting encoded data to.  */
/*    short x: the x location of the macroblock in macroblock units          */
/*    short y: the y location of the macroblock in macroblock units          */
/*    short *blocks[6]:  the DCT coded blocks                                */
/*    fame_motion_vector_t *forward: forward motion vectors                  */
/*    fame_motion_vector_t *backward: backward motion vectors                */
/*    unsigned char q: the quantizer scale for this block                    */
/*    fame_bab_t bab_type: binary alpha block type                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_reconstruct_inter_mb(fame_decoder_t *decoder,
				      short x,
				      short y,
				      short *blocks[6],
				      fame_motion_vector_t *forward,
				      fame_motion_vector_t *backward,
				      fame_motion_coding_t motion_coding,
				      unsigned char q,
				      fame_bab_t bab_type)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
  unsigned long offset[6];
  signed long motion[6];
  int coded[6];
  signed long residual[6];
  int i, j, pitch;
  void (* dequantize)(short *block,
		      dct_t *cache,
		      dct_t *dqmatrix,
		      dct_t *psmatrix,
		      dct_t *mismatch);

  pitch = decoder_mpeg->new_ref[0]->p;

  /* Make offsets to blocks */
  offset[0] = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
  offset[1] = offset[0] + 8;                       /* Y(0,1) */
  offset[2] = offset[0] + (pitch << 3);            /* Y(1,0) */
  offset[3] = offset[2] + 8;                       /* Y(1,1) */
  offset[4] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
  offset[5] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */

  /* Compute motion offsets (motion is half-pixel coded) */
  for(i = 0; i < 4; i++) {
    /* full-pel motion */
    motion[i] = (forward[i].dy >> 1) * pitch + (forward[i].dx >> 1);
    /* half-pel motion */
    residual[i] = ((forward[i].dy & 1) << 1) | (forward[i].dx & 1);
  }
  for(i = 4; i < 6; i++) {
    /* full-pel motion */
    motion[i] = (forward[i].dy >> 1) * (pitch >> 1) + (forward[i].dx >> 1);
    /* half-pel motion */
    residual[i] = ((forward[i].dy & 1) << 1) | (forward[i].dx & 1);
  }

  /* check for not coded blocks */
  for(j = 0; j < 6; j++) {
    coded[j] = 0;
    if(blocks[j] != NULL)
      for(i = 0; i < 64; i++) {
	coded[j] |= blocks[j][i];
      }
  }

 if(decoder_mpeg->mismatch == fame_mismatch_local)
    dequantize = dequantize_inter_local;
  else
    dequantize = dequantize_inter_global;

  /* Reconstruct blocks */
  for(i = 0; i < 4; i++) { /* Y */
    if(coded[i]) {
      dequantize(blocks[i],
		 decoder_mpeg->tmpblock,
		 decoder_mpeg->nidqmatrixes[q],
		 decoder_mpeg->psmatrix,
		 &decoder_mpeg->mismatch_accumulator[i][y*(pitch>>3)+x]);
      idct(decoder_mpeg->tmpblock);
      sum(decoder_mpeg->new_ref[0]->y + offset[i],
	  decoder_mpeg->future_ref[residual[i]]->y + offset[i] + motion[i],
	  &forward[i].error,
	  decoder_mpeg->tmpblock,
	  pitch);
    } else {
      move(decoder_mpeg->new_ref[0]->y + offset[i],
	   decoder_mpeg->future_ref[residual[i]]->y + offset[i] + motion[i],
	   pitch);
      forward[i].error = 0;
    }
  }
  
  /* U */
  if(coded[4]) {
    dequantize(blocks[4],
	       decoder_mpeg->tmpblock,
	       decoder_mpeg->nidqmatrixes[q],
	       decoder_mpeg->psmatrix,
	       &decoder_mpeg->mismatch_accumulator[4][y*(pitch>>3)+x]);
    idct(decoder_mpeg->tmpblock);
    sum(decoder_mpeg->new_ref[0]->u + offset[4],
	decoder_mpeg->future_ref[residual[4]]->u + offset[4] + motion[4],
	&forward[4].error,
	decoder_mpeg->tmpblock,
	pitch >> 1);
  } else {
    move(decoder_mpeg->new_ref[0]->u + offset[4],
	 decoder_mpeg->future_ref[residual[4]]->u + offset[4] + motion[4],
	 pitch >> 1);
    forward[4].error = 0;
  }
  
  /* V */
  if(coded[5]) {
    dequantize(blocks[5],
	       decoder_mpeg->tmpblock,
	       decoder_mpeg->nidqmatrixes[q],
	       decoder_mpeg->psmatrix,
	       &decoder_mpeg->mismatch_accumulator[5][y*(pitch>>3)+x]);
    idct(decoder_mpeg->tmpblock);
    sum(decoder_mpeg->new_ref[0]->v + offset[5],
	decoder_mpeg->future_ref[residual[5]]->v + offset[5] + motion[5],
	&forward[5].error,
	decoder_mpeg->tmpblock,
	pitch >> 1);
  } else {
    move(decoder_mpeg->new_ref[0]->v + offset[5],
	 decoder_mpeg->future_ref[residual[5]]->v + offset[5] + motion[5],
	 pitch >> 1);
    forward[5].error = 0;
  }

  /* fill the block if needed */
  if(bab_type >= bab_border_16x16)
    mpeg_pad_mb(decoder, x, y);
}

/*  mpeg_pad                                                                 */
/*                                                                           */
/*  Description:                                                             */
/*    Perform extended padding for motion estimation.                        */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*    unsigned char *bab_map: binary alpha block type map                    */
/*    fame_box_t box: bounding box                                           */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_pad(fame_decoder_t *decoder,
		     unsigned char *bab_map,
		     fame_box_t *box)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
  int i;
  void (* pad)(int i,
	       int width,
	       int height,
	       fame_yuv_t **frame,
	       unsigned char *shape,  /* not used */
	       unsigned char *bab_map, /* not used */
	       fame_box_t *box);

  if(decoder_mpeg->shape)
    pad = extended_pad_withmask;
  else
    pad = extended_pad_withoutmask;

  for(i = 0; i < 4; i++)
    pad(i,
	decoder_mpeg->width,
	decoder_mpeg->height,
	decoder_mpeg->new_ref,
	decoder_mpeg->shape,
	bab_map,
	box);
}


/*  mpeg_interpolate                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Compute half-pel resolution frames from reference frame.               */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_interpolate(fame_decoder_t *decoder, int rounding)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);

  half_interpolate(decoder_mpeg->width,
		   decoder_mpeg->height,
		   decoder_mpeg->new_ref,
		   rounding);
}

/*  mpeg_leave                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    End the encoding of a picture.                                         */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_leave(fame_decoder_t *decoder)
{
  arch_leave_state();
}

/*  mpeg_close                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    Release the decoder.                                                   */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_decoder_t *decoder: the decoder                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void mpeg_close(fame_decoder_t *decoder)
{
  fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);

#ifdef HAS_MMX
  /* free mismatch accumulator */
  { 
    int i;
    if(decoder_mpeg->mismatch == fame_mismatch_global)
      for(i = 0; i < 6; i++)
	fame_free(decoder_mpeg->mismatch_accumulator[i]); 
  }
#endif
}
