next up previous contents
Next: Gatherv Up: Collectives Previous: Bcast

Gather

int gather_is_short(int count, MPI_Datatype dtype, MPI_Comm comm)
{
  int size;

  MPI_Pack_size(count, dtype, comm, &size);
  if (size < 2048) {
    return(1);
  } else {
    return(0);
  }
}

int MPI_Gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
               int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
  if (gather_is_short(scount, sdtype, comm)) {
    gather_short(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
  } else {
    gather_long(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
  }

  return(MPI_SUCCESS);
}

int gather_long(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
                int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
  MPI_Status status;
  MPI_Aint extent;
  int i, nprocs, myrank, incr;
  char *p;

  MPI_Comm_rank(comm, &myrank);
  MPI_Comm_size(comm, &nprocs);

  if (myrank != root) {
    MPI_Send(sbuf, scount, sdtype, root, IMPI_GATHER_TAG, comm);
  }

  MPI_Type_extent(rdtype, &extent);
  incr = extent * rcount;

  for (i = 0, p = (char *) rbuf; i < nprocs; i++, p += incr) {
    if (i == myrank) {
      MPI_Sendrecv(sbuf, scount, sdtype, i, IMPI_GATHER_TAG, 
                   p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, &status);
    } else {
      MPI_Recv(p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, &status);
    }
  }

  return(MPI_SUCCESS);
}

int gather_short(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, 
                 int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
  MPI_Status status;
  int myrank, nmasters, packsize, vnum, rootnum, nmasters;
  int mask, nprocs, count, size;

  MPI_Comm_rank(comm, &myrank);
  MPI_Comm_size(comm, &nprocs);
  MPI_Pack_size(scount, sdtype, comm, &packsize);

  if (is_master(myrank, comm)) {
    allocate a temporary buffer tmpbuf of size nprocs*packsize;
  }

  nmasters = num_masters(comm);

  /* local phase */
  if (are_local(myrank, root, comm)) {
    gather the send buffers of the local processes into the
      root's receive buffer;
  } else {
    gather send buffers at the local master into tmpbuf;
    /* At this point the master must have a buffer tmpbuf
     * containing a concatenation in rank order of the
     * local processes packed send buffers.
     */
  }

  /* global phase */
  if ((myrank == root) || (is_master(myrank, comm) 
      && !are_local(myrank, root, comm))) {

    if (nmasters <= MAXLINEARGATHER) {
      /* linear gather to root */
      if (myrank == root) {
        for (i = 0, size = 0; i < nmasters; i++) {
          if (i == local_master_num(root, comm)) {
            continue;            /* skip root's node */
          }

          MPI_Recv(tmpbuf+size, nprocs*packsize, MPI_BYTE, 
                   master_rank(i, comm), IMPI_GATHER_TAG, comm, &status);
          MPI_Get_count(status, MPI_BYTE, &count);
          size += count;
      } else {
        size = num_local_to_rank(myrank, comm) * packsize;
        MPI_Send(tmpbuf, size, MPI_BYTE, root, IMPI_GATHER_TAG, comm); 
      }
    } else {
      /* tree gather to root */
      mynum = local_master_num(myrank, comm);
      rootnum = master_num(root, comm);
      vnum = (mynum - rootnum + nmasters) % nmasters;

      if (myrank == root) {
          size = 0;
      } else {
        size = num_local_to_rank(myrank, comm) * packsize;
      }

      for (mask = 1; mask < nprocs; mask <<= 1) {
        if (vnum & mask) {
          peer = master_rank(((vnum & ~mask) + rootnum) % nmasters, comm);
          if (are_local(peer, root, comm)) {
            peer = root;
          }

          MPI_Send(tmpbuf, size, MPI_BYTE, peer, IMPI_GATHER_TAG, comm);
          break;
        }
        else {
          peer = vnum | mask;
          if (peer >= nmasters) continue;
          peer = master_rank((peer + rootnum) % nmasters, comm);
          if (are_local(peer, root, comm)) {
            peer = root;
          }

          MPI_Recv(tmpbuf+size, nprocs*packsize, MPI_BYTE, peer, 
                   IMPI_GATHER_TAG, comm, &status);
          MPI_Get_count(status, MPI_BYTE, &count);
          size += count;
        }
      }
    }
  }

  /* local phase */
  if (myrank == root) {
    /* tmpbuf contains concatenated in order of master rank the 
     * concatenations of the process send buffers created in the first
     * local phase
     */
    unpack the data in tmpbuf into the receive buffer;
  }

  if (is_master(myrank, comm)) {
    free(tmpbuf);
  }
  return(MPI_SUCCESS);
}



IMPI Protocol ver 0.0
DRAFT March 22, 1999