/*
 * Copyright (c) 2001-2003 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 * $HEADER$
 *
 * $Id: ssi_rpi_gm_recv_events.c,v 1.9.2.4 2004/03/12 01:19:12 vsahay Exp $
 *
 *	Function:	- GM low-level routines
 */

#include <lam_config.h>

#include <stdlib.h>
#include <string.h>

#include <rpisys.h>
#include <lamdebug.h>

#include <rpi_gm.h>
#include <rpi_gm_recv_events.h>
#include <rpi_gm_recv_queue.h>
#include <rpi_gm_unexpected.h>
#include <rpi_gm_dreg.h>
#include <rpi_gm_tiny.h>
#include <rpi_gm_short.h>
#include <rpi_gm_long.h>
#include <rpi_gm_util.h>


/*
 * private functions
 */
static int receive_envelope(struct lam_ssi_rpi_proc *src_proc, 
			    struct lam_ssi_rpi_gm_envl *env);


/*
 * there was an event received on gm -- handle it
 */
int
lam_ssi_rpi_gm_recv_event(gm_recv_event_t *event, int fast)
{
  int postsize;
  char *postbuf;
  struct _proc *src_p;
  struct lam_ssi_rpi_proc *src_proc;
  struct lam_ssi_rpi_req *gmreq;
  char *received_data;

  int recv_sender_node_id = gm_ntoh_u16(event->recv.sender_node_id);
  int recv_sender_port_id = gm_ntoh_u8(event->recv.sender_port_id);
  unsigned long recv_length = gm_ntoh_u32(event->recv.length);
  int recv_size = gm_ntoh_u8(event->recv.size);
  void *recv_message = gm_ntohp(event->recv.message);
  void *recv_buffer = gm_ntohp(event->recv.buffer);

  /* lookup to see which process the receive came from */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "%d: getting from gm_map[%d, %d] ==> %d", 
                  lam_myproc->p_gps.gps_grank,
                  recv_sender_node_id,
                  recv_sender_port_id,
                  LAM_SSI_RPI_GM_MAP_HASH(recv_sender_node_id,
                                          recv_sender_port_id)));
  src_p = 
    lam_ssi_rpi_gm_map[LAM_SSI_RPI_GM_MAP_HASH(recv_sender_node_id,
                                               recv_sender_port_id)];
  lam_debug_cond((lam_ssi_rpi_gm_did, "%d: gm_map[%d, %d] is rank %d", 
                  lam_myproc->p_gps.gps_grank,
                  recv_sender_node_id,
                  recv_sender_port_id,
                  src_p->p_gps.gps_grank));

  /* get source process */

  src_proc = src_p->p_rpi;
#if 1
  /* JMS This is horrible for envelopes -- guarantees that we add
     latency.  Needs to be optimized. */
  if (fast) {
    gm_memorize_message(recv_message, recv_buffer, recv_length);
  }
  received_data = recv_buffer;
#else
  /* Optimize: use the message directly, and don't copy it to the
     buffer */
  if (fast)
    received_data = recv_message;
  else
    received_data = recv_buffer;
#endif

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "%d: got something size=%d length=%d "
                  "(fast: %d), buffer %p, event buffer %p", 
                  lam_myproc->p_gps.gps_grank,
                  recv_size,
                  recv_length,
                  fast,
                  received_data, recv_buffer));

  /* It's either an envelope or the second part of a message that we
     have already received the envelope for. 

     First case: it's the second part (short or long body) of a
     message that we have already received the envelope for.  

     Where we put this incoming message is dependant if the message is
     expected or unexpected.  If it is expected, then we have a
     matching request, and it has been cached in
     lam_ssi_rpi_proc->cp_current_recv_req -- recv_advance_fn will be set to
     what to do next.  If it is unexepcted, the cbuf_msg unexepcted
     buffer is cached in lam_ssi_rpi_proc->cp_bmsg.  Note: we will only be in
     this situation for unexpected messages when they are *short* --
     unexpected tiny messages are fully received in one shot,
     unexpected long messages only send an envelope. */

  /* Receive the body of an unexpected short message.  The allocated
     unexpected buffer is guaranteed to be large enough. */

  if (src_proc->cp_bmsg != NULL) {
    if (lam_ssi_rpi_gm_unexpected_receive_short(src_proc, received_data) != 0)
      return LAMERROR;

    /* Post a new short receive message buffer */

    postbuf = lam_ssi_rpi_gm_dma_short_malloc();
    postsize = lam_ssi_rpi_gm_data_min_size_for_length;
  }

  /* Receive the body of an expected short or long message */

  else if (src_proc->cp_current_recv_req != NULL) {
    gmreq = src_proc->cp_current_recv_req->rq_rpi;
    lam_debug_cond((lam_ssi_rpi_gm_did, 
                    "recv_events: doing part 2 of a receive: req %p", 
                    src_proc->cp_current_recv_req));
    if (gmreq->recv_advance_fn == NULL ||
	gmreq->recv_advance_fn(src_proc->cp_current_recv_req, 
			       received_data) != 0) {
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "recv_event: recv_advance_fn returns LAMERROR"));
      return LAMERROR;
    }

    /* If we just received a short message, setup to post a new
       dma_short buffer.  If it was a long, then we don't post a new
       dma_short buffer. */

    if (recv_size == lam_ssi_rpi_gm_data_min_size_for_length) {
      postbuf = lam_ssi_rpi_gm_dma_short_malloc();
      postsize = lam_ssi_rpi_gm_data_min_size_for_length;
    } else {
      postbuf = 0;
      postsize = 0;
    }
  }

  /* Next case: a new envelope */

  else {
    lam_debug_cond((lam_ssi_rpi_gm_did,
                    "recv_events: receiving an envelope"));
    if (receive_envelope(src_proc, 
                         (struct lam_ssi_rpi_gm_envl*) received_data) == LAMERROR)
      return LAMERROR;

    postbuf = (char *) lam_ssi_rpi_gm_dma_env_malloc();
    postsize = lam_ssi_rpi_gm_env_min_size_for_length;
  } 

  /* We know that we have a receive token, because we just finished a
     receive.  So we can just post a new receive buffer. */

  if (postbuf) {
    gm_provide_receive_buffer(lam_myproc->p_rpi->cp_gm_port,
			      postbuf,
			      postsize,
			      LAM_SSI_RPI_GM_PRIORITY);
    lam_debug_cond((lam_ssi_rpi_gm_did,
                    "posted new receive buffer %p, size=%d", 
                    postbuf, postsize));
  }

  return 0;
}


/*
 * An envelope has been received.  Process it, and find (or create) a
 * matching request.  
 *
 * - If this is only the first part of the message (i.e., the short or
 * long protocol), put the matching request in
 * src_proc->cp_current_recv_req so that the next receive will go
 * straight to that request.
 *
 * - If the message was sent with the synchronous bit enabled, send
 * back an ACK.
 *
 * - If this is a tiny (non-synchronous) message, the message is done.
 *
 * This is an MPI_Request->recv_advance_fn function.  Therefore, it
 * needs to reset MPI_Request->recv_advance_fn when it completes.  
 */
static int
receive_envelope(struct lam_ssi_rpi_proc *src_proc, 
                 struct lam_ssi_rpi_gm_envl *env)
{
  int ret;
  int want_remove;
  MPI_Request req;
  struct lam_ssi_rpi_gm_reqlist_item_t *rli;

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "receive_envelope: checking for pending "
                  "recvs on lam_ssi_rpi_proc %p", 
                  src_proc));
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "receive_envelope: rank %d, tag %d, cid %d, "
                  "flags 0x%x, len %d, seq %d",
                  env->ge_env.ce_rank,
                  env->ge_env.ce_tag,
                  env->ge_env.ce_cid,
                  env->ge_env.ce_flags,
                  env->ge_env.ce_len,
                  env->ge_env.ce_seq));
  
  /* Is this an ACK?  If so, it must be on the pending ACK list.  Go
     handle it. */

  if ((env->ge_env.ce_flags & C2CACK) != 0) {
    lam_debug_cond((lam_ssi_rpi_gm_did, "receive_envelope: got an ACK!"));
    want_remove = 0;

    /* NOTE: By definition, ACKs will never be unexepcted.  So we just
       need to find the corresponding ACK on the expected-ACK-list,
       and send the rest of the body.  Note also that they may come
       out of order -- since ACKs are only sent when the actual
       receive is posted, the sending order in the user's program may
       be totally different than the receiving order (in the user's
       program).  For example:

       Sender:
       MPI_Send(long_buf_1, ..., dest, tag1, comm1);
       MPI_Send(long_buf_2, ..., dest, tag2, comm2);

       Receiver:
       MPI_Recv(long_buf_2, ..., src, tag2, comm2, ...);
       MPI_Recv(long_buf_1, ..., src, tag1, comm1, ...);

       Such a sequence would cause the ACK for the message associated
       with tag2 to be sent before the ACK for the message associated
       with tag 1. */

    rli = PUB(reqlist_match)(&(src_proc->cp_pending_acks), env);
    lam_debug_cond((lam_ssi_rpi_gm_did, "recv_envelope: found ACK req %p",
                    rli->req));
    req = rli->req;

    /* Remove it from the list of pending ACKs */

    PUB(reqlist_rm_item)(&(src_proc->cp_pending_acks), rli);
    lam_debug_cond((lam_ssi_rpi_gm_did, 
                    "recv_envelope: got ACK with env: len %d, seq %d",
                    env->ge_env.ce_len, env->ge_env.ce_seq));

    /* Process it */

    ret = req->rq_rpi->recv_advance_fn(req, (char*) env);

    /* Now we're done with the envelope.  Return it to the pool. */

    lam_ssi_rpi_gm_dma_env_free(env);

    return ret;
  }

  /* If this is not an ACK, it's a regular envelope.  Was a receive
     for this message already posted? */

  else if ((rli = lam_ssi_rpi_gm_reqlist_match(&(src_proc->cp_pending_recvs), 
					       env)) != NULL) {
    lam_debug_cond((lam_ssi_rpi_gm_did, "receiving env: found match!"));
    req = rli->req;
    want_remove = 1;
    lam_ssi_rpi_gm_fill_wildcards(req, &(env->ge_env));

    /* First look at the matched request and see if it was a probe.
       If it was, then mark the request done and turn envelope into an
       unexpected message. */

    if (req->rq_type == LAM_RQIPROBE) {
      req->rq_state = LAM_RQSDONE;
      lam_ssi_rpi_fill_mpi_status(req, env->ge_env.ce_rank, env->ge_env.ce_tag, 
				   env->ge_env.ce_len);
      lam_ssi_rpi_gm_haveadv = 1;
      --lam_rq_nactv;

      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "receiving env: matched a probe -- "
                      "now unexpected env"));
      ret = lam_ssi_rpi_gm_unexpected_receive_env(src_proc, env);
    }
    
    /* Now look at the envelope, and dispatch accordingly.  Hence, we
       dispatch on what the message actually is, not on what its
       matched request says it should be -- this takes care of the
       [erroneous but possible] case where the received message is
       longer than the posted request. */
    /* NOTE: The ACK received by the <SENDER> from the <RECEIVER> in a
       long message is handled in the above ACK-handling code; hence
       it isn't a possibily listed below. */

    /* <RECIEVER> receipt of the actual body from the <SENDER> on a
       long message.  This routine will take care of removing this
       request from the pending receive queues because req->rq_rank
       may be MPI_ANY_SOURCE, but receive_env_first will have already
       removed this envelope from all other receive queues except for
       the one on this proc.  Hence, the general want_remove code
       below is not sufficient, and we just do it in
       receive_env_second.  This is first to ensure that if the
       env->ge_env.ce_len was shortened because the message was truncated, we
       will finish the long protocol -- not dispatch into tiny or
       short receives. */

    else if ((env->ge_env.ce_flags & C2C2ND) != 0) {
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "receiving env: receive second long"));
      ret = lam_ssi_rpi_gm_long_receive_env_second(env, req);
      want_remove = 0;
    }

    /* Receipt of a tiny message */

    else if (env->ge_env.ce_len <= lam_ssi_rpi_gm_tinymsglen) {
      lam_debug_cond((lam_ssi_rpi_gm_did,
                      "receiving env: receive tiny env"));
      ret = lam_ssi_rpi_gm_tiny_receive_env(env, req);
    }

    /* Receipt of a short message */

    else if (env->ge_env.ce_len <= lam_ssi_rpi_gm_shortmsglen) {
      lam_debug_cond((lam_ssi_rpi_gm_did, 
                      "receiving env: receive short env"));
      ret = lam_ssi_rpi_gm_short_receive_env(env, req);
    }

    /* Initial <RECEIVER> receipt of an envelope from the <SENDER> of
       a long message.  Since receiving a long message may actually
       involve multiple receives on the same request, we do *not* want
       to remove the request from the pending list of receives here.
       If necessary, the long protocol code will take care of removing
       this request from relevant receive queues (i.e., if this is an
       MPI_ANY_SOURCE, we need to remove it from all by one receive
       queue, or if the <RECEIVER> posted a zero-length buffer, we
       remove it from all queues because we won't receive anything
       from the <SENDER>). */

    else {
      lam_debug_cond((lam_ssi_rpi_gm_did, 
		      "receiving env: receive first long"));
      ret = lam_ssi_rpi_gm_long_receive_env_first(env, req);
      want_remove = 0;
    }
  }

  /* No, a receive for this envelope was not already posted -- this is
     an unexpected message.  Hence, by definition, it's not on any of
     the receive queues, and so we should not bother trying to remove
     it. */

  else {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
		    "receiving env: receive unexpected env"));
    ret = lam_ssi_rpi_gm_unexpected_receive_env(src_proc, env);
    want_remove = 0;
  }

  /* Now we're done with the envelope.  Return it to the pool. */

  lam_ssi_rpi_gm_dma_env_free(env);

  if (ret != 0)
    return LAMERROR;

  /* If the posted receive is MPI_ANY_SOURCE, go remove this from the
     pending_recvs queues on all the procs.  Otherwise, just remove it
     from the pending_recvs queue on the source proc.  Ensure to save
     the req before we remove it from the list because reqlist_rm_*
     will free(rli). */

  if (want_remove == 1) {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
		    "recv_events: removing req from recv queues"));
    if (rli->req->rq_rank == MPI_ANY_SOURCE)
      ret = lam_ssi_rpi_gm_rm_read_any_src(req, NULL);
    else {
      ret = lam_ssi_rpi_gm_rm_read(src_proc, rli);
    }
    if (ret != 0)
      return LAMERROR;
  }

  lam_debug_cond((lam_ssi_rpi_gm_did, "receiving env: done"));
  return ret;
}
