IMPI_Int4 scatterv_is_short(int *count, MPI_Datatype dtype,
int root, MPI_Comm comm)
{
IMPI_Int4 maxsize;
int i, myrank, nprocs, size;
MPI_Comm_rank(comm, &myrank);
MPI_Comm_size(comm, &nprocs);
MPI_Pack_size(1, dtype, comm, &size);
if (myrank == root) {
maxsize = count[0] * size;
for (i = 1; i < nprocs; i++) {
if (count[i] * size > maxsize) {
maxsize = count[i] * size;
}
if (maxsize > 2048) {
maxsize = 0;
}
}
MPI_Bcast(&maxsize, 1, IMPI_INT4, root, comm);
return(maxsize);
}
int MPI_Scatterv(void *sbuf, int *scounts, int *disps, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
int root, MPI_Comm comm)
{
IMPI_Int4 maxsize;
maxsize = scatterv_is_short(scounts, sdtype, root, comm);
if (maxsize) {
scatterv_short(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype,
root, comm, maxsize);
} else {
scatterv_long(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype,
root, comm);
}
return(MPI_SUCCESS);
}
/* find sum of the counts to be scattered to the processes
* local to master number i
*/
int sum_counts_to_master(int *counts, int i, MPI_Comm comm)
{
int *ranks;
int i, nranks, sum;
nranks = num_local_to_master(i, comm);
ranks = locals_to_master(i, comm);
for (i = 0, sum = 0; i < nranks; i++) {
sum += counts[ranks[i]];
}
free(ranks);
return(sum);
}
int scatterv_short(void *sbuf, int *scounts, int *disps, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
int root, MPI_Comm comm, IMPI_Int4 maxsize)
{
MPI_Status status;
int i, myrank, packsize, size;
void *tmpbuf;
MPI_Comm_rank(comm, &myrank);
MPI_Pack_size(1, sdtype, comm, &packsize);
/* global phase */
if (myrank == root) {
nmasters = num_masters(comm);
for (i = 0; i < nmasters; i++) {
if (i == local_master_num(root, comm)) {
continue; /* skip root's node */
}
size = sum_counts_to_master(scounts, i, comm) * packsize;
create a temporary buffer tmpbuf of size bytes and
put into it concatenated in rank order packed copies of the data
destined for each process local to master i;
MPI_Send(tmpbuf, size, MPI_BYTE,
master_rank(i, comm), IMPI_SCATTERV_TAG, comm);
free tmpbuf;
}
}
else if (is_master(myrank, comm) && !are_local(myrank, root, comm))) {
size = num_local_to_rank(myrank, comm) * maxsize;
allocate a temporary buffer tmpbuf of size bytes;
MPI_Recv(tmpbuf, size, MPI_BYTE, root,
IMPI_SCATTERV_TAG, comm, &status);
}
/* local phase */
if (are_local(myrank, root, comm)) {
scatter data from root sbuf to local processes;
} else {
scatter packed data from master tmpbuf to local processes;
}
free all temporary buffers which are still allocated;
return(MPI_SUCCESS);
}
int scatterv_long(void *sbuf, int *scounts, int *disps, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype,
int root, MPI_Comm comm)
{
MPI_Status status;
MPI_Aint extent;
int i, myrank, nprocs;
char *p;
MPI_Comm_rank(comm, &myrank);
MPI_Comm_size(comm, &nprocs);
if (myrank != root) {
MPI_Recv(rbuf, rcount, rdtype, root, IMPI_SCATTERV_TAG, comm, &status);
}
MPI_Type_extent(sdtype, &extent);
for (i = 0; i < nprocs; i++) {
p = ((char *) sbuf) + (extent * disps[i]);
if (i == myrank) {
MPI_Sendrecv(p, scounts[i], sdtype, i, IMPI_SCATTERV_TAG,
rbuf, rcount, rdtype, i, IMPI_SCATTERV_TAG,
comm, &status);
} else {
MPI_Send(p, scounts[i], sdtype, i, IMPI_SCATTER_TAG, comm);
}
}
return(MPI_SUCCESS);
}