/*
 * Copyright (c) 2001-2003 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 * $HEADER$
 *
 * $Id: ssi_rpi_gm_tiny.c,v 1.12.2.6 2004/03/12 01:19:12 vsahay Exp $
 *
 *	Function: - everything to do with tiny messages.  Functions in
 *	this file are marked with "<SENDER>" or "<RECEIVER>" to
 *	indicate which process they will be invoked it.
 */

#include <lam_config.h>

#include <stdlib.h>
#include <string.h>

#include <mpi.h>
#include <mpisys.h>
#include <rpisys.h>
#include <lamdebug.h>

#include <rpi_gm.h>
#include <rpi_gm_tiny.h>
#include <rpi_gm_dreg.h>
#include <rpi_gm_send_queue.h>
#include <rpi_gm_recv_queue.h>
#include <rpi_gm_util.h>
#include <rpi_gm_ack.h>
#include <rpi_gm_actions.h>


/*
 * private functions
 */
static int send_tiny(MPI_Request req);
static int tiny_complete_send_request(MPI_Request req, char *env);
static void tiny_send_env_callback(struct gm_port *port, void *context, 
				   gm_status_t status);


/*
 * <SENDER>
 *
 * Try to send a tiny message:
 *
 * - See if there are any other messages ahead of this one that are
 * waiting to be sent.  If so, add this request onto the queue and try
 * to advance it.
 *
 * - Get one send token.
 *
 * - If we can't get one send token, queue this message up for later
 * sending.
 *
 * - Send the message
 *
 * This is an MPI_Request->send_advance_fn function.  Therefore, it
 * needs to reset MPI_Request->send_advance_fn when it completes.
 *
 * Returns 0 or LAMERROR.
 */
int 
lam_ssi_rpi_gm_tiny_send(MPI_Request req)
{
  lam_debug_cond((lam_ssi_rpi_gm_did,
		  "send_tiny: sending request %p\n", req));

  /* Mark this request as in the progression engine */

  req->rq_state = LAM_RQSACTIVE;
  req->rq_rpi->is_advancing = 1;

  /* If we can't actually send now, set the next action to be to send
     the actual message. */

  if (!LAM_SSI_RPI_GM_SEND_QUEUE_EMPTY() ||
      lam_ssi_rpi_gm_stokens < 1) {
    req->rq_rpi->send_advance_fn = send_tiny;
    LAM_SSI_RPI_GM_SEND_QUEUE_ADD(req);

    /* Don't bother trying to advance the queue now -- every time we
       get a send token back, the queue is advanced.  Hence, we are
       guaranteed to not have enough send tokens to advance anything
       in the queue right now.  So just return. */

    return 0;
  } 

  /* If we actually send it now, there's no next action -- this
     request is done when the gm_send completes. */

  else {
    --lam_ssi_rpi_gm_stokens;
    lam_debug_cond((PUB(did), "decremented stokens 1: %d", lam_ssi_rpi_gm_stokens));
    req->rq_rpi->send_advance_fn = NULL;
    return send_tiny(req);
  }

  /* Never exit through here */
}


/*
 * <SENDER>
 *
 * Try to send a tiny synchronous message:
 *
 * - Essentially the same as sending tiny messages, but at the end,
 * queue this message up to receive an ACK later.
 *
 * This is an MPI_Request->send_advance_fn function.  Therefore, it
 * needs to reset MPI_Request->send_advance_fn when it completes.
 *
 * Returns 0 or LAMERROR.
 */
int 
lam_ssi_rpi_gm_tiny_send_sync(MPI_Request req)
{
  /* Queue this request up to receive the ACK.  We must do this
     *before* the actual send to prevent a race condition -- there's
     no sense in having to do unexpected receives for ACKs.  i.e.,
     *guarantee* that the ACK will be expected by posting it before we
     do the actual send.  It's ok to do this, even if we don't know if
     the message will actually be sent now or later because the ACK
     request are queued up in order. */

  if (lam_ssi_rpi_gm_add_ack(req->rq_proc->p_rpi, req) != 0)
    return LAMERROR;

  /* When the ACK is received, it is technically a "receive" action.
     So set the function to complete this request on the recv_fn
     function.  When the ACK is received, this request is finished. */

  req->rq_rpi->recv_advance_fn = tiny_complete_send_request;

  /* Call the normal tiny send -- use exactly the same code */

  if (lam_ssi_rpi_gm_tiny_send(req) != 0)
    return LAMERROR;

  return 0;
}


/*
 * <RECEIVER>
 *
 * This function is invoked when we have received an envelope for a
 * tiny message.  Therefore, we've received an entire tiny message.
 */
int 
lam_ssi_rpi_gm_tiny_receive_env(struct lam_ssi_rpi_gm_envl *env, 
                                MPI_Request req)
{
  /* Copy the message to the destination buffer.  Sender made right,
     so we don't need to worry about endian here.  */
  /* Check for length mismatch */

  if (env->ge_env.ce_len > 0) {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
		    "receive_tiny: len payload>0: %d -- "
		    "posted rcv for len %d",
		    env->ge_env.ce_len, req->rq_packsize));
    if (env->ge_env.ce_len <= req->rq_packsize)
      lam_memcpy(req->rq_packbuf, env + 1, env->ge_env.ce_len);
    else {
      lam_memcpy(req->rq_packbuf, env + 1, req->rq_packsize);
      req->rq_flags |= LAM_RQFTRUNC;
    }
  }
  lam_ssi_rpi_fill_mpi_status(req, env->ge_env.ce_rank, env->ge_env.ce_tag, 
                              env->ge_env.ce_len);
  lam_debug_cond((lam_ssi_rpi_gm_did, 
		  "receive_tiny: filled status: rank:%d, tag:%d, len:%d",
		  env->ge_env.ce_rank, env->ge_env.ce_tag, 
                  env->ge_env.ce_len));
  lam_ssi_rpi_gm_haveadv = 1;

  /* The receiving portion of this request is finished */

  req->rq_rpi->recv_advance_fn = NULL;

  /* Was this a sync mode send that requires an ACK?  If so, the
     request won't be finished until the ACK has successfully been
     sent.  Otherwise, it's done now. */

  if (env->ge_env.ce_flags & C2CSSEND) {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
		    "receive_tiny: sending an ACK because this was an ssend"));
    if (lam_ssi_rpi_gm_send_ack_done(req, env) != 0)
      return LAMERROR;
    lam_debug_cond((lam_ssi_rpi_gm_did, "receive_tiny: ACK queued up"));
  } else {
    req->rq_state = LAM_RQSDONE;
    --lam_rq_nactv;
  }

  return 0;
}


/************************************************************************/

/*
 * <SENDER>
 *
 * This function is a wrapper around the actual gm send -- all the
 * setup has been done.
 */
static int 
send_tiny(MPI_Request req)
{
  if (lam_ssi_rpi_gm_push_envelope(req, tiny_send_env_callback) != 0)
    return LAMERROR;

  /* If this is a sync send, we just sent the message envelope
     (containing *my* rank in the communicator), we now need to change
     the rank in the request's envelope to the *receiver's* rank so
     that when the ACK arrives, and we're comparing it against this
     request, the ce_rank will match.  Ditto for the flags -- mark it
     with the C2CACK flag so that it matches when we look for
     comparisons later. */
  /* Have to defer these changes to the envelope callback -- cannot
     change the envelope now that it's been handed to the gm
     library */

  return 0;
}


/*
 * <SENDER>
 *
 * This function is invoked after an ACK is received.  It means that
 * the request is now finished, and we should move it into the done
 * state.  No data will be passed through the second argument.
 */
static int 
tiny_complete_send_request(MPI_Request req, char *env)
{
  lam_debug_cond((lam_ssi_rpi_gm_did, "<tiny>completed send request"));

  /* This request is done */

  req->rq_state = LAM_RQSDONE;
  lam_ssi_rpi_gm_haveadv = 1;
  --lam_rq_nactv;

  return 0;
}


/*
 * <SENDER>
 *
 * This function is invoked when an envelope has finished sending.  In
 * this case, the entire message has been sent, so the request is
 * done, unless we're still waiting for the ACK back from a
 * synchronous send.
 */
static void 
tiny_send_env_callback(struct gm_port *port, void *context, gm_status_t status)
{
  LAM_SSI_RPI_GM_SEND_CALLBACK_START;

  lam_debug_cond((lam_ssi_rpi_gm_did, 
		  "<tiny>send_env_callback: all done! %p\n", req));

  /* Alter the envelope as described in send_tiny() */

  if (req->rq_rpi->cq_envbuf->ge_env.ce_flags & C2CSSEND) {
    req->rq_rpi->cq_envbuf->ge_env.ce_rank = req->rq_rank;
    req->rq_rpi->cq_envbuf->ge_env.ce_flags |= C2CACK;
  }

  /* 
   * Now that the envelope has finished, what do we do?  
   *
   * If this was a non-synchronous message, it is now done.
   * Move it into the done state.
   *
   * If it was synchronous, this request has already been queued to
   * receive an ACK later.  So we don't need to do anything here.
   *
   * However, the 
   */

  if ((req->rq_rpi->cq_envbuf->ge_env.ce_flags & C2CSSEND) == 0) {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
                    "<tiny>send_env_callback: not a sync, so done!"));
    req->rq_state = LAM_RQSDONE;
    --lam_rq_nactv;
  }

  /* Increment the send tokens and mark that this c2c RPI has advanced */

  LAM_SSI_RPI_GM_SEND_CALLBACK_FINISH;
}
