int alltoall_is_short(int count, MPI_Datatype dtype, MPI_Comm comm)
{
int size;
MPI_Type_size(dtype, &size);
if (count * size < 2048) {
return(1);
} else {
return(0);
}
}
int MPI_Alltoall(void *sbuf, int scount, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
MPI_Comm comm)
{
if (alltoall_is_short(scount, sdtype, comm)) {
alltoall_short(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
} else {
alltoall_long(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
}
}
int alltoall_long(void *sbuf, int scount, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
MPI_Comm comm)
{
MPI_Request *reqs;
MPI_Status *stats;
MPI_Aint sextent, rextent;
int i, nprocs;
MPI_Comm_size(comm, &nprocs);
MPI_Type_extent(sdtype, &sextent);
MPI_Type_extent(rdtype, &rextent);
reqs = (MPI_Request *) malloc(2 * nprocs * sizeof(MPI_Request));
stats = (MPI_Status *) malloc(2 * nprocs * sizeof(MPI_Status));
for (i = 0; i < nprocs; i++) {
MPI_Irecv((char *) rbuf + i * rcount * rextent, rcount,
rdtype, i, IMPI_ALLTOALL_TAG, comm, &reqs[2*i]);
MPI_Isend((char *) sbuf + i * scount * sextent, scount,
sdtype, i, IMPI_ALLTOALL_TAG, comm, &reqs[2*i + 1]);
}
MPI_Waitall(2 * nprocs, reqs, stats);
free(reqs);
free(stats);
return(MPI_SUCCESS);
}
int alltoall_short(void *sbuf, int scount, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
MPI_Comm comm)
{
MPI_Status status;
int i, j, myrank, nmasters, packsize, size;
int rootmaster, nroot;
MPI_Comm_rank(comm, &myrank);
MPI_Pack_size(scount, sdtype, comm, &packsize);
/* local phase */
do local all to all exchange;
/* global phase */
/* This phase rotates around the nodes treating each one in turn as
* the root. The root node master collects in turn the buffers
* destinated for each other node and sends them in a single transfer
* to the node where a local operation scatters them to the local
* destination processes.
*/
nmasters = num_masters(comm);
for (i = 0; i < nmasters; i++) {
rootmaster = master_rank(i, comm);
nroot = num_local_to_master(i, comm);
if (are_local(myrank, rootmaster, comm)) {
for (j = 0; j < nmasters; j++) {
if (i == j) continue;
if (myrank == rootmaster) {
size = nroot * num_local_to_master(j, comm) * packsize;
allocate a temporary buffer tmpbuf of size bytes;
}
gather into the tmpbuf at rootmaster all send buffers
destined from processes on node i to processes on node j, they
are concatenated in order of sender rank by receiver rank;
if (myrank == rootmaster) {
MPI_Send(tmpbuf, size, MPI_BYTE, master_rank(j, comm),
MPI_ALLTOALL_TAG, comm);
free tmpbuf;
}
}
} else {
/* not local to the rootmaster */
if (is_master(myrank, comm)) {
size = nroot * num_local_to_rank(myrank, comm) * packsize;
allocate a temporary buffer tmpbuf of size bytes;
MPI_Recv(tmpbuf, size, MPI_BYTE, rootmaster,
MPI_ALLTOALL_TAG, comm, &status);
}
scatter the packed send buffers receieved from rootmaster from
the tmpbuf of the local master to the local processes;
if (is_master(myrank, comm)) {
free tmpbuf;
}
}
}
return(MPI_SUCCESS);
}