int gather_is_short(int count, MPI_Datatype dtype, MPI_Comm comm)
{
int size;
MPI_Pack_size(count, dtype, comm, &size);
if (size < 2048) {
return(1);
} else {
return(0);
}
}
int MPI_Gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf,
int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
if (gather_is_short(scount, sdtype, comm)) {
gather_short(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
} else {
gather_long(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
}
return(MPI_SUCCESS);
}
int gather_long(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf,
int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
MPI_Status status;
MPI_Aint extent;
int i, nprocs, myrank, incr;
char *p;
MPI_Comm_rank(comm, &myrank);
MPI_Comm_size(comm, &nprocs);
if (myrank != root) {
MPI_Send(sbuf, scount, sdtype, root, IMPI_GATHER_TAG, comm);
}
MPI_Type_extent(rdtype, &extent);
incr = extent * rcount;
for (i = 0, p = (char *) rbuf; i < nprocs; i++, p += incr) {
if (i == myrank) {
MPI_Sendrecv(sbuf, scount, sdtype, i, IMPI_GATHER_TAG,
p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, &status);
} else {
MPI_Recv(p, rcount, rdtype, i, IMPI_GATHER_TAG, comm, &status);
}
}
return(MPI_SUCCESS);
}
int gather_short(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf,
int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm)
{
MPI_Status status;
int myrank, nmasters, packsize, vnum, rootnum, nmasters;
int mask, nprocs, count, size;
MPI_Comm_rank(comm, &myrank);
MPI_Comm_size(comm, &nprocs);
MPI_Pack_size(scount, sdtype, comm, &packsize);
if (is_master(myrank, comm)) {
allocate a temporary buffer tmpbuf of size nprocs*packsize;
}
nmasters = num_masters(comm);
/* local phase */
if (are_local(myrank, root, comm)) {
gather the send buffers of the local processes into the
root's receive buffer;
} else {
gather send buffers at the local master into tmpbuf;
/* At this point the master must have a buffer tmpbuf
* containing a concatenation in rank order of the
* local processes packed send buffers.
*/
}
/* global phase */
if ((myrank == root) || (is_master(myrank, comm)
&& !are_local(myrank, root, comm))) {
if (nmasters <= MAXLINEARGATHER) {
/* linear gather to root */
if (myrank == root) {
for (i = 0, size = 0; i < nmasters; i++) {
if (i == local_master_num(root, comm)) {
continue; /* skip root's node */
}
MPI_Recv(tmpbuf+size, nprocs*packsize, MPI_BYTE,
master_rank(i, comm), IMPI_GATHER_TAG, comm, &status);
MPI_Get_count(status, MPI_BYTE, &count);
size += count;
} else {
size = num_local_to_rank(myrank, comm) * packsize;
MPI_Send(tmpbuf, size, MPI_BYTE, root, IMPI_GATHER_TAG, comm);
}
} else {
/* tree gather to root */
mynum = local_master_num(myrank, comm);
rootnum = master_num(root, comm);
vnum = (mynum - rootnum + nmasters) % nmasters;
if (myrank == root) {
size = 0;
} else {
size = num_local_to_rank(myrank, comm) * packsize;
}
for (mask = 1; mask < nprocs; mask <<= 1) {
if (vnum & mask) {
peer = master_rank(((vnum & ~mask) + rootnum) % nmasters, comm);
if (are_local(peer, root, comm)) {
peer = root;
}
MPI_Send(tmpbuf, size, MPI_BYTE, peer, IMPI_GATHER_TAG, comm);
break;
}
else {
peer = vnum | mask;
if (peer >= nmasters) continue;
peer = master_rank((peer + rootnum) % nmasters, comm);
if (are_local(peer, root, comm)) {
peer = root;
}
MPI_Recv(tmpbuf+size, nprocs*packsize, MPI_BYTE, peer,
IMPI_GATHER_TAG, comm, &status);
MPI_Get_count(status, MPI_BYTE, &count);
size += count;
}
}
}
}
/* local phase */
if (myrank == root) {
/* tmpbuf contains concatenated in order of master rank the
* concatenations of the process send buffers created in the first
* local phase
*/
unpack the data in tmpbuf into the receive buffer;
}
if (is_master(myrank, comm)) {
free(tmpbuf);
}
return(MPI_SUCCESS);
}