/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include "fame.h"
#include "fame_motion.h"
#include "fame_motion_fourstep.h"
#include "mad_int.h"

static fame_motion_coding_t fourstep_estimation(fame_motion_t *motion,
						int mb_x,
						int mb_y,
						fame_motion_vector_t *vectors,
						unsigned char quant);
static void find_vector(fame_yuv_t **ref,
			unsigned char *current,
			unsigned char *shape,
			int offset[4],
			int edged_offset[4],
			int x,
			int y,
			int width,
			int height,
			int pitch,
			int search_range,
			int step_count,
			compute_error_t eval_error,
			fame_motion_vector_t *mv,
			int unrestricted);

static void find_subvector(fame_yuv_t **ref,
			   unsigned char *current,
			   unsigned char *shape,
			   int offset,
			   int edged_offset,
			   int x,
			   int y,
			   int width,
			   int height,
			   int pitch,
			   int search_range,
			   int step_count,
			   compute_error_t eval_error,
			   fame_motion_vector_t *mv,
			   int unrestricted);

static void find_half_vector(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     int offset[4],
			     int edged_offset[4],
			     int x,
			     int y,
			     int width,
			     int height,
			     int pitch,
			     compute_error_t eval_error,
			     fame_motion_vector_t *mv,
			     int unrestricted);

static void find_half_subvector(fame_yuv_t **ref,
				unsigned char *current,
				unsigned char *shape,
				int offset,
				int edged_offset,
				int x,
				int y,
				int width,
				int height,
				int pitch,
				compute_error_t eval_error,
				fame_motion_vector_t *mv,
				int unrestricted);

FAME_CONSTRUCTOR(fame_motion_fourstep_t)
{
  fame_motion_t_constructor(FAME_MOTION(this));
  FAME_OBJECT(this)->name = "four-step motion estimation";
  FAME_MOTION(this)->estimation = fourstep_estimation;
  return(this);
}

#define INFINITE_ERROR 65536

/* Directions :
 * 5 4 3
 * 6 1 2
 * 7 8 9
 */
#define NULL_MOTION 1

struct direction
{
  int dx;
  int dy;
  int index_direction;
};
typedef struct direction direction_t;

struct tab_direction
{
  int number;
  direction_t *directions;
};
typedef struct tab_direction tab_direction_t;


static direction_t td0[9] = {
  {  0,  0, 1},
  {  1,  0, 2},
  {  1,  1, 3},
  {  0,  1, 4},
  { -1,  1, 5},
  { -1,  0, 6},
  { -1, -1, 7},
  {  0, -1, 8},
  {  1, -1, 9}
};

static direction_t td1[8] = {
  {  1,  0, 2},
  {  1,  1, 3},
  {  0,  1, 4},
  { -1,  1, 5},
  { -1,  0, 6},
  { -1, -1, 7},
  {  0, -1, 8},
  {  1, -1, 9}
};

static direction_t td2[3] = {
  {  1,  0, 2},
  {  1,  1, 3},
  {  1, -1, 9}
};

static direction_t td3[5] = {
  {  1,  0, 2},
  {  1,  1, 3},
  {  0,  1, 4},
  { -1,  1, 5},
  {  1, -1, 9}
};

static direction_t td4[3] = {
  {  1,  1, 3},
  {  0,  1, 4},
  { -1,  1, 5}
};

static direction_t td5[5] = {
  {  1,  1, 3},
  {  0,  1, 4},
  { -1,  1, 5},
  { -1,  0, 6},
  { -1, -1, 7}
};

static direction_t td6[3] = {
  { -1,  1, 5},
  { -1,  0, 6},
  { -1, -1, 7}
};

static direction_t td7[5] = {
  { -1,  1, 5},
  { -1,  0, 6},
  { -1, -1, 7},
  {  0, -1, 8},
  {  1, -1, 9}
};

static direction_t td8[3] = {
  { -1, -1, 7},
  {  0, -1, 8},
  {  1, -1, 9}
};

static direction_t td9[5] = {
  {  1,  0, 2},
  {  1,  1, 3},
  { -1, -1, 7},
  {  0, -1, 8},
  {  1, -1, 9}
};

static tab_direction_t td[10] =
{
  {9, td0},
  {8, td1},
  {3,td2},
  {5,td3},
  {3,td4},
  {5,td5},
  {3,td6},
  {5,td7},
  {3,td8},
  {5,td9}
};

/*  fourstep_estimation                                                      */
/*                                                                           */
/*  Description:                                                             */
/*    Estimation motion for a macroblock.                                    */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_motion_t *motion: the motion estimation                           */
/*    int mb_x: x coordinate of the macroblock in macroblock unit            */
/*    int mb_y: y coordinate of the macroblock in macroblock unit            */
/*    fame_motion_vector_t *vectors: predicted vectors for this macroblock   */
/*                                                                           */
/*  Return value:                                                            */
/*    fame_motion_coding_t: intra or inter coded.                            */

static fame_motion_coding_t fourstep_estimation(fame_motion_t *motion,
						int mb_x,
						int mb_y,
						fame_motion_vector_t *vectors,
						unsigned char quant)
{
  int pitch;
  unsigned char *current;
  unsigned char *shape;
  int unrestricted;
  int x, y, width, height;
  int offset[4];
  int edged_offset[4];
  int count;
  int sad_inter, sad_inter4v, mad_inter;
  fame_motion_vector_t subvectors[4];
  int k;

  x = mb_x << 4;
  y = mb_y << 4;
  width = motion->mb_width << 4;
  height = motion->mb_height << 4;
  current = motion->current->y;
  pitch = motion->current->p;
  shape = motion->shape;
  unrestricted = (motion->flags & FAME_MOTION_UNRESTRICTED_SEARCH)?1:0;

  /* saturate prediction to borders */
  if(unrestricted) {
    if((x<<1)+vectors[0].dx<(-16)<<1) vectors[0].dx = (-x-16)<<1;
    if((y<<1)+vectors[0].dy<(-16)<<1) vectors[0].dy = (-y-16)<<1;
    if((x<<1)+vectors[0].dx>(width<<1)) vectors[0].dx = (width-x)<<1;
    if((y<<1)+vectors[0].dy>(height<<1)) vectors[0].dy = (height-y)<<1;
  } else {
    if((x<<1)+vectors[0].dx<0) vectors[0].dx = (-x)<<1;
    if((y<<1)+vectors[0].dy<0) vectors[0].dy = (-y)<<1;
    if((x<<1)+vectors[0].dx>((width-16)<<1)) vectors[0].dx = (width-16-x)<<1;
    if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1;
  }
  
  /* compute zero motion MAD and number of pixels in shape */
  offset[0] = y * pitch + x;
  offset[1] = y * pitch + x+8;
  offset[2] = (y+8) * pitch + x;
  offset[3] = (y+8) * pitch + x+8;
  edged_offset[0] = y * (pitch+32) + x;
  edged_offset[1] = y * (pitch+32) + x+8;
  edged_offset[2] = (y+8) * (pitch+32) + x;
  edged_offset[3] = (y+8) * (pitch+32) + x+8;
    
  if(motion->shape) {
    vectors[0].count = mad_withmask(current+offset[0], shape+offset[0], pitch, &vectors[0].deviation);
    vectors[1].count = mad_withmask(current+offset[1], shape+offset[1], pitch, &vectors[1].deviation);
    vectors[2].count = mad_withmask(current+offset[2], shape+offset[2], pitch, &vectors[2].deviation);
    vectors[3].count = mad_withmask(current+offset[3], shape+offset[3], pitch, &vectors[3].deviation);
  } else {
    vectors[0].count = mad_withoutmask(current+offset[0], pitch, &vectors[0].deviation);
    vectors[1].count = mad_withoutmask(current+offset[1], pitch, &vectors[1].deviation);
    vectors[2].count = mad_withoutmask(current+offset[2], pitch, &vectors[2].deviation);
    vectors[3].count = mad_withoutmask(current+offset[3], pitch, &vectors[3].deviation);
  }
    
  /* integer sample 4-step search */
  find_vector(motion->ref, current, shape, offset, edged_offset,
	      x, y, width, height, pitch,
	      motion->search_range, 4,
	      motion->MAE8x8,
	      vectors,
	      unrestricted);

  if(motion->flags & FAME_MOTION_BLOCK_SEARCH) {
    /* subvector search */
    for(k = 0; k < 4; k++) { /* TODO: k depends on shape */
      subvectors[k].dx = vectors[k].dx;
      subvectors[k].dy = vectors[k].dy;
      subvectors[k].error = vectors[k].error;
      /* integer sample 2-step search */
      find_subvector(motion->ref, current, shape,
		     offset[k], edged_offset[k],
		     x, y, width, height, pitch,
		     motion->search_range, 2,
		     motion->MAE8x8,
		     &subvectors[k],
		     unrestricted);
      /* half sample search */
      find_half_subvector(motion->ref, current, shape,
			  offset[k], edged_offset[k],
			  x, y, width, height, pitch,
			  motion->MAE8x8,
			  &subvectors[k],
			  unrestricted);
    }
  }

  /* half sample search */
  /* we do this step before intra/inter decision since in our case */
  /* we've already computed subpel planes for all the image and thus */
  /* the overhead cost for estimating subpel vector is relatively small */
  find_half_vector(motion->ref, current, shape, offset, edged_offset,
		   x, y, width, height, pitch,
		   motion->MAE8x8,
		   vectors,
		   unrestricted);

  sad_inter = vectors[0].error + vectors[1].error + 
              vectors[2].error + vectors[3].error;
  sad_inter4v = subvectors[0].error + subvectors[1].error + 
                subvectors[2].error + subvectors[3].error; /* TODO: depends on shape */
  mad_inter = vectors[0].deviation + vectors[1].deviation + 
              vectors[2].deviation + vectors[3].deviation;
  count = vectors[0].count + vectors[1].count + vectors[2].count + vectors[3].count;

  /* inter4v/inter mode decision */
  if((motion->flags & FAME_MOTION_BLOCK_SEARCH) &&
     sad_inter4v + ((count>>1)+1) < sad_inter) {
    /* inter4v prediction */
    sad_inter = sad_inter4v;
    for(k = 0; k < 4; k++) {
      vectors[k].dx = subvectors[k].dx;
      vectors[k].dy = subvectors[k].dy;
      vectors[k].error = subvectors[k].error;
    }
  }

  /* intra/inter mode decision */
  if(mad_inter + count + count < sad_inter)
    return(motion_intra);
  else
    return(motion_inter);
}

/*  find_vector                                                              */
/*                                                                           */
/*  Description:                                                             */
/*    Integer sample four-step search.                                       */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_yuv_t **ref: reference frames (half-pel)                          */
/*    unsigned char *current: current frame                                  */
/*    unsigned char *shape: current shape                                    */
/*    int offset[4]: offsets to the blocks                                   */
/*    int x: x coordinate of the block in pixel unit                         */
/*    int y: y coordinate of the block in pixel unit                         */
/*    int width: width of the block in pixels                                */
/*    int height: height of the block in pixels                              */
/*    int pitch: number of pixels to the next line                           */
/*    int count: number of pixels in macroblock                              */
/*    int search_range: maximum motion range in pixels                       */
/*    compute_error_t eval_error: error evaluation function                  */
/*    fame_motion_vector_t *mv: motion vectors                               */
/*                                                                           */
/*  Notes:                                                                   */
/*    The search is centered on the predicted vector(s).                     */
/*    The search is made on the Y blocks only.                               */
/*    When allowing 4 vectors, the search is performed separetely for each.  */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void find_vector(fame_yuv_t **ref,
			unsigned char *current,
			unsigned char *shape,
			int offset[4],
			int edged_offset[4],
			int x,
			int y,
			int width,
			int height,
			int pitch,
			int search_range,
			int step_count,
			compute_error_t eval_error,
			fame_motion_vector_t *mv,
			int unrestricted)
{
  int i;
  int counter_fourstep;
  int step;
  int tab_stepx[3];
  int tab_stepy[3];
  int *ptr_stepx;
  int *ptr_stepy;
  int last_motion;
  int motion;
  int residual;
  tab_direction_t *current_td;
  fame_motion_vector_t rel[4];
  fame_motion_vector_t test[4];

  ptr_stepx = &tab_stepx[1];
  ptr_stepy = &tab_stepy[1];

  last_motion = NULL_MOTION;
  current_td = &(td[last_motion]);
  counter_fourstep = step_count;

  rel[0].error = INFINITE_ERROR / 4;
  rel[1].error = INFINITE_ERROR / 4;
  rel[2].error = INFINITE_ERROR / 4;
  rel[3].error = INFINITE_ERROR / 4;

  step = 1 << (step_count-1);

  motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * (pitch+32);
  residual = (mv[0].dx & 1) + ((mv[0].dy & 1) << 1);

  /* initial step */
  mv[0].error = 
    eval_error(ref[residual]->y+edged_offset[0]+motion,
	       current+offset[0],
	       shape+offset[0],
	       pitch) - (mv[0].count >> 1);
  mv[1].error = 
    eval_error(ref[residual]->y+edged_offset[1]+motion,
	       current+offset[1],
	       shape+offset[1],
	       pitch) - (mv[1].count >> 1);
  mv[2].error = 
    eval_error(ref[residual]->y+edged_offset[2]+motion,
	       current+offset[2],
	       shape+offset[2],
	       pitch) - (mv[2].count >> 1);
  mv[3].error = 
    eval_error(ref[residual]->y+edged_offset[3]+motion,
	       current+offset[3],
	       shape+offset[3],
	       pitch) - (mv[3].count >> 1);

  if(mv[0].error+mv[1].error+mv[2].error+mv[3].error <= 0) {
    mv[3].dx = mv[2].dx = mv[1].dx = mv[0].dx;
    mv[3].dy = mv[2].dy = mv[1].dy = mv[0].dy;
    return;
  }

  while(step)
  {
    if(unrestricted) {
      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv[0].dx,fame_min(((x+16)<<1)+mv[0].dx, step<<1))>>1;
      ptr_stepx[+1] = fame_min(((search_range-1)<<1)-mv[0].dx,fame_min(((width-x)<<1)-mv[0].dx, step<<1))>>1;
      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv[0].dy,fame_min(((y+16)<<1)+mv[0].dy, step<<1))>>1;
      ptr_stepy[+1] = fame_min(((search_range-1)<<1)-mv[0].dy,fame_min(((height-y)<<1)-mv[0].dy, step<<1))>>1;
    } else {
      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv[0].dx,fame_min((x<<1)+mv[0].dx, step<<1))>>1;
      ptr_stepx[+1] = fame_min(((search_range-1)<<1)-mv[0].dx,fame_min(((width-x-16)<<1)-mv[0].dx, step<<1))>>1;
      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv[0].dy,fame_min((y<<1)+mv[0].dy, step<<1))>>1;
      ptr_stepy[+1] = fame_min(((search_range-1)<<1)-mv[0].dy,fame_min(((height-y-16)<<1)-mv[0].dy, step<<1))>>1;
    }

    /* update the step */
    if(last_motion == NULL_MOTION || counter_fourstep == 0) 
    {
      step >>= 1;
      current_td = &(td[0]);
    }
    else
      counter_fourstep--;

    /* search the best motion vector from the current point */
    for(i = 0; i < current_td->number; i++)
    {
      test[0].dx =
	current_td->directions[i].dx *
	ptr_stepx[current_td->directions[i].dx];
      test[0].dy =
	current_td->directions[i].dy *
	ptr_stepy[current_td->directions[i].dy];
      
      test[0].error =
	eval_error(ref[residual]->y+edged_offset[0]+motion+
		   (test[0].dx+test[0].dy*(pitch+32)),
		   current+offset[0],
		   shape+offset[0],
		   pitch);
      test[1].error =
	eval_error(ref[residual]->y+edged_offset[1]+motion+
		   (test[0].dx+test[0].dy*(pitch+32)),
		   current+offset[1],
		   shape+offset[1],
		   pitch);
      test[2].error =
	eval_error(ref[residual]->y+edged_offset[2]+motion+
		   (test[0].dx+test[0].dy*(pitch+32)),
		   current+offset[2],
		   shape+offset[2],
		   pitch);
      test[3].error =
	eval_error(ref[residual]->y+edged_offset[3]+motion+
		   (test[0].dx+test[0].dy*(pitch+32)),
		   current+offset[3],
		   shape+offset[3],
		   pitch);

      if(test[0].error+test[1].error+test[2].error+test[3].error <
	 rel[0].error+rel[1].error+rel[2].error+rel[3].error)
      {
	last_motion = current_td->directions[i].index_direction;
	rel[0].error = test[0].error;
	rel[1].error = test[1].error;
	rel[2].error = test[2].error;
	rel[3].error = test[3].error;
	rel[0].dx = test[0].dx;
	rel[0].dy = test[0].dy;
      }
    }
    
    /* Update the motion vector_t and the location in the window */
    if(rel[0].error+rel[1].error+rel[2].error+rel[3].error < 
       mv[0].error+mv[1].error+mv[2].error+mv[3].error)
    {
      mv[0].error = rel[0].error;
      mv[1].error = rel[1].error;
      mv[2].error = rel[2].error;
      mv[3].error = rel[3].error;
      mv[0].dx += rel[0].dx << 1;
      mv[0].dy += rel[0].dy << 1;	
      current_td = &(td[last_motion]);
      motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * (pitch+32);
    }
    else
    {
      last_motion = NULL_MOTION;
      current_td = &(td[last_motion]);
    }
  }
  mv[3].dx = mv[2].dx = mv[1].dx = mv[0].dx;
  mv[3].dy = mv[2].dy = mv[1].dy = mv[0].dy;
}

static void find_subvector(fame_yuv_t **ref,
			   unsigned char *current,
			   unsigned char *shape,
			   int offset,
			   int edged_offset,
			   int x,
			   int y,
			   int width,
			   int height,
			   int pitch,
			   int search_range,
			   int step_count,
			   compute_error_t eval_error,
			   fame_motion_vector_t *mv,
			   int unrestricted)
{
  int i;
  int counter_fourstep;
  int step;
  int tab_stepx[3];
  int tab_stepy[3];
  int *ptr_stepx;
  int *ptr_stepy;
  int last_motion;
  int motion;
  int residual;
  tab_direction_t *current_td;
  fame_motion_vector_t rel;
  fame_motion_vector_t test;

  ptr_stepx = &tab_stepx[1];
  ptr_stepy = &tab_stepy[1];
  last_motion = NULL_MOTION;
  current_td = &(td[last_motion]);
  counter_fourstep = step_count;
  rel.error = INFINITE_ERROR / 4;
  step = 1 << (step_count - 1);
  motion = (mv->dx >> 1) + (mv->dy >> 1) * (pitch+32);
  residual = (mv->dx & 1) + ((mv->dy & 1) << 1);

  /* initial step */
  mv->error = eval_error(ref[residual]->y+offset+motion,
			 current+offset,
			 shape+offset,
			 pitch);

  if(mv->error <= 0) return;

  while(step)
  {
    if(unrestricted) {
      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv->dx,fame_min(((x+16)<<1)+mv->dx, step<<1))>>1;
      ptr_stepx[1]  = fame_min(((search_range-1)<<1)-mv->dx,fame_min(((width-x)<<1)-mv->dx, step<<1))>>1;
      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv->dy,fame_min(((y+16)<<1)+mv->dy, step<<1))>>1;
      ptr_stepy[1]  = fame_min(((search_range-1)<<1)-mv->dy,fame_min(((height-y)<<1)-mv->dy, step<<1))>>1;
    } else {
      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv->dx,fame_min((x<<1)+mv->dx, step<<1))>>1;
      ptr_stepx[1]  = fame_min(((search_range-1)<<1)-mv->dx,fame_min(((width-x-16)<<1)-mv->dx, step<<1))>>1;
      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv->dy,fame_min((y<<1)+mv->dy, step<<1))>>1;
      ptr_stepy[1]  = fame_min(((search_range-1)<<1)-mv->dy,fame_min(((height-y-16)<<1)-mv->dy, step<<1))>>1;
    }

    /* update the step */
    if(last_motion==NULL_MOTION || counter_fourstep==0) 
    {
      step >>= 1;
      current_td = &(td[1]);
    }
    else
      counter_fourstep--;
    
    /* search the best motion vector from the current point */
    for(i = 0; i < current_td->number; i++)
    {
      test.dx =
	current_td->directions[i].dx *
	ptr_stepx[current_td->directions[i].dx];
      test.dy =
	current_td->directions[i].dy *
	ptr_stepy[current_td->directions[i].dy];
      
      test.error =
	eval_error(ref[residual]->y+edged_offset+motion+
		   (test.dx+test.dy*(pitch+32)),
		   current+offset,
		   shape+offset,
		   pitch);
      
      if(test.error < rel.error)
      {
	last_motion = current_td->directions[i].index_direction;
	rel.error = test.error;
	rel.dx = test.dx;
	rel.dy = test.dy;
      }
    }
    
    /* Update the motion vector_t and the location in the window */
    if(rel.error < mv->error)
    {
      mv->error = rel.error;
      mv->dx += rel.dx << 1;
      mv->dy += rel.dy << 1;	
      current_td = &(td[last_motion]);
      motion = (mv->dx >> 1) + (mv->dy >> 1) * (pitch+32);
    }
    else
    {
      last_motion = NULL_MOTION;
      current_td = &(td[last_motion]);
    }
  }
}

/*  find_half_vector                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Refine integer vector(s) to half-pel vector(s).                        */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_yuv_t **ref: reference frames (half-pel)                          */
/*    unsigned char *current: current frame                                  */
/*    unsigned char *shape: current shape                                    */
/*    int offset[4]: offsets to the blocks                                   */
/*    int x: x coordinate of the block in pixel unit                         */
/*    int y: y coordinate of the block in pixel unit                         */
/*    int width: width of the block in pixels                                */
/*    int height: height of the block in pixels                              */
/*    int pitch: number of pixels to the next line                           */
/*    compute_error_t eval_error: error evaluation function                  */
/*    fame_motion_vector_t *mv: motion vectors                               */
/*                                                                           */
/*  Notes:                                                                   */
/*    The search is centered on the integer vector(s).                       */
/*    The search is made on the Y blocks only.                               */
/*    When allowing 4 vectors, the search is performed separetely for each.  */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void find_half_vector(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     int offset[4],
			     int edged_offset[4],
			     int x,
			     int y,
			     int width,
			     int height,
			     int pitch,
			     compute_error_t eval,
			     fame_motion_vector_t *mv,
			     int unrestricted)
{
  int xh, yh;
  int e[4], best[4];
  int m, r;
  int i;
  int edge;

  xh = yh = 0;
  best[0] = mv[0].error;
  best[1] = mv[1].error;
  best[2] = mv[2].error;
  best[3] = mv[3].error;
  edge = unrestricted << 4;

  if(((y+edge) << 1) + mv[0].dy > 0) {
    if(((x+edge) << 1) + mv[0].dx > 0) {
      /* -0.5 -0.5 */
      for(i = 0; i < 4; i++) {
	m = ((mv[i].dx-1)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
	r = ((mv[i].dx-1) & 1) + (((mv[i].dy-1) & 1) << 1);
	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		    shape + offset[i], pitch);
      }
      if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
	best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
	xh = -1;
	yh = -1;
      }
    }
    /* 0 -0.5 */
    for(i = 0; i < 4; i++) {
      m = ((mv[i].dx)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
      r = ((mv[i].dx) & 1) + (((mv[i].dy-1) & 1) << 1);
      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		  shape + offset[i], pitch);
    }
    if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
      best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
      xh =  0;
      yh = -1;
    }
    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
      /* +0.5 -0.5 */
      for(i = 0; i < 4; i++) {
	m = ((mv[i].dx+1)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
	r = ((mv[i].dx+1) & 1) + (((mv[i].dy-1) & 1) << 1);
	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		    shape + offset[i], pitch);
      }
      if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
	best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
	xh = +1;
	yh = -1;
      }
    }
  }
  
  if(((x+edge) << 1) + mv->dx > 0) {
    /* -0.5 0 */
    for(i = 0; i < 4; i++) {
      m = ((mv[i].dx-1)>>1) + ((mv[i].dy)>>1) * (pitch+32);
      r = ((mv[i].dx-1) & 1) + (((mv[i].dy) & 1) << 1);
      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		  shape + offset[i], pitch);
    }
    if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
      best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
      xh = -1;
      yh =  0;
    }
  }
  if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
    /* +0.5 0 */
    for(i = 0; i < 4; i++) {
      m = ((mv[i].dx+1)>>1) + ((mv[i].dy)>>1) * (pitch+32);
      r = ((mv[i].dx+1) & 1) + (((mv[i].dy) & 1) << 1);
      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		  shape + offset[i], pitch);
    }
    if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
      best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
      xh = +1;
      yh =  0;
    }
  }
  
  if(((y-edge) << 1) + mv->dy < ((height-16) << 1)) {
    if(((x+edge) << 1) + mv->dx > 0) {
      /* -0.5 +0.5 */
      for(i = 0; i < 4; i++) {
	m = ((mv[i].dx-1)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
	r = ((mv[i].dx-1) & 1) + (((mv[i].dy+1) & 1) << 1);
	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		    shape + offset[i], pitch);
	}
      if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
	best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
	xh = -1;
	yh = +1;
      }
    }
    /* 0 +0.5 */
    for(i = 0; i < 4; i++) {
      m = ((mv[i].dx)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
      r = ((mv[i].dx) & 1) + (((mv[i].dy+1) & 1) << 1);
      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		  shape + offset[i], pitch);
    }
    if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
      best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
      xh =  0;
      yh = +1;
    }
    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
      /* +0.5 +0.5 */
      for(i = 0; i < 4; i++) {
	m = ((mv[i].dx+1)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
	r = ((mv[i].dx+1) & 1) + (((mv[i].dy+1) & 1) << 1);
	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
		    shape + offset[i], pitch);
      }
      if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
	best[0] = e[0]; best[1] = e[1]; best[2] = e[2]; best[3] = e[3];
	xh = +1;
	yh = +1;
      }
    }
  }

  mv[0].dx += xh;
  mv[0].dy += yh;
  mv[0].error = best[0];
  mv[1].dx += xh;
  mv[1].dy += yh;
  mv[1].error = best[1];
  mv[2].dx += xh;
  mv[2].dy += yh;
  mv[2].error = best[2];
  mv[3].dx += xh;
  mv[3].dy += yh;
  mv[3].error = best[3];
}

static void find_half_subvector(fame_yuv_t **ref,
				unsigned char *current,
				unsigned char *shape,
				int offset,
				int edged_offset,
				int x,
				int y,
				int width,
				int height,
				int pitch,
				compute_error_t eval,
				fame_motion_vector_t *mv,
				int unrestricted)
{
  int xh, yh;
  int e, best;
  int m, r;
  int edge;

  xh = yh = 0;
  best = mv->error;
  edge = unrestricted << 4;

  if(((y+edge) << 1) + mv->dy > 0) {
    if(((x+edge) << 1) + mv->dx > 0) {
      /* -0.5 -0.5 */
      m = ((mv->dx-1)>>1) + ((mv->dy-1)>>1) * (pitch+32);
      r = ((mv->dx-1) & 1) + (((mv->dy-1) & 1) << 1);
      e = eval(ref[r]->y+edged_offset+m, current + offset,
	       shape + offset, pitch);
      if(e < best) {
	best = e;
	xh = -1;
	yh = -1;
      }
    }
    /* 0 -0.5 */
    m = ((mv->dx)>>1) + ((mv->dy-1)>>1) * (pitch+32);
    r = ((mv->dx) & 1) + (((mv->dy-1) & 1) << 1);
    e = eval(ref[r]->y+edged_offset+m, current + offset,
	     shape + offset, pitch);
    if(e < best) {
      best = e;
      xh =  0;
      yh = -1;
    }
    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
      /* +0.5 -0.5 */
      m = ((mv->dx+1)>>1) + ((mv->dy-1)>>1) * (pitch+32);
      r = ((mv->dx+1) & 1) + (((mv->dy-1) & 1) << 1);
      e = eval(ref[r]->y+edged_offset+m, current + offset,
	       shape + offset, pitch);
      if(e < best) {
	best = e;
	xh = +1;
	yh = -1;
      }
    }
  }
    
  if(((x+edge) << 1) + mv->dx > 0) {
    /* -0.5 0 */
    m = ((mv->dx-1)>>1) + ((mv->dy)>>1) * (pitch+32);
    r = ((mv->dx-1) & 1) + (((mv->dy) & 1) << 1);
    e = eval(ref[r]->y+edged_offset+m, current + offset,
	     shape + offset, pitch);
    if(e < best) {
      best = e;
      xh = -1;
      yh =  0;
    }
  }
  if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
    /* +0.5 0 */
    m = ((mv->dx+1)>>1) + ((mv->dy)>>1) * (pitch+32);
    r = ((mv->dx+1) & 1) + (((mv->dy) & 1) << 1);
    e = eval(ref[r]->y+edged_offset+m, current + offset,
	     shape + offset, pitch);
    if(e < best) {
      best = e;
      xh = +1;
      yh =  0;
    }
  }
    
  if(((y-edge) << 1) + mv->dy < ((height-16) << 1)) {
    if(((x+edge) << 1) + mv->dx > 0) {
      /* -0.5 +0.5 */
      m = ((mv->dx-1)>>1) + ((mv->dy+1)>>1) * (pitch+32);
      r = ((mv->dx-1) & 1) + (((mv->dy+1) & 1) << 1);
      e = eval(ref[r]->y+edged_offset+m, current + offset,
	       shape + offset, pitch);
      if(e < best) {
	best = e;
	xh = -1;
	yh = +1;
      }
    }
    /* 0 +0.5 */
    m = ((mv->dx)>>1) + ((mv->dy+1)>>1) * (pitch+32);
    r = ((mv->dx) & 1) + (((mv->dy+1) & 1) << 1);
    e = eval(ref[r]->y+edged_offset+m, current + offset,
	     shape + offset, pitch);
    if(e < best) {
      best = e;
      xh =  0;
      yh = +1;
    }
    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
      /* +0.5 +0.5 */
      m = ((mv->dx+1)>>1) + ((mv->dy+1)>>1) * (pitch+32);
      r = ((mv->dx+1) & 1) + (((mv->dy+1) & 1) << 1);
      e = eval(ref[r]->y+edged_offset+m, current + offset,
	       shape + offset, pitch);
      if(e < best) {
	best = e;
	xh = +1;
	yh = +1;
      }
    }
  }

  mv->dx += xh;
  mv->dy += yh;
  mv->error = best;
}
