diff --git a/src/include/mpir_group.h b/src/include/mpir_group.h index c40f22fe877..1a80386ea7f 100644 --- a/src/include/mpir_group.h +++ b/src/include/mpir_group.h @@ -11,18 +11,7 @@ * only because they are required for the group operations (e.g., * MPI_Group_intersection) and for the scalable RMA synchronization *---------------------------------------------------------------------------*/ -/* This structure is used to implement the group operations such as - MPI_Group_translate_ranks */ -/* note: next_lpid (with idx_of_first_lpid in MPIR_Group) gives a linked list - * in a sorted lpid ascending order */ -typedef struct MPII_Group_pmap_t { - uint64_t lpid; /* local process id, from VCONN */ - int next_lpid; /* Index of next lpid (in lpid order) */ -} MPII_Group_pmap_t; - -/* Any changes in the MPIR_Group structure must be made to the - predefined value in MPIR_Group_builtin for MPI_GROUP_EMPTY in - src/mpi/group/grouputil.c */ + /*S MPIR_Group - Description of the Group data structure @@ -53,22 +42,32 @@ typedef struct MPII_Group_pmap_t { Group-DS S*/ + +/* Abstract the integer type for lpid (process id). It is possible to use 32-bit + * in principle, but 64-bit is simpler since we can trivially combine + * (world_idx, world_rank). + */ +typedef uint64_t MPIR_Lpid; + +struct MPIR_Pmap { + int size; /* same as group->size, duplicate here so Pmap is logically complete */ + bool use_map; + union { + MPIR_Lpid *map; + struct { + MPIR_Lpid offset; + MPIR_Lpid stride; + MPIR_Lpid blocksize; + } stride; + } u; +}; + struct MPIR_Group { MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ int size; /* Size of a group */ - int rank; /* rank of this process relative to this - * group */ - int idx_of_first_lpid; - MPII_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local - * process number */ - int is_local_dense_monotonic; /* see NOTE-G1 */ - - /* We may want some additional data for the RMA syncrhonization calls */ - /* Other, device-specific information */ -#ifdef MPID_DEV_GROUP_DECL - MPID_DEV_GROUP_DECL -#endif - MPIR_Session * session_ptr; /* Pointer to session to which this group belongs */ + int rank; /* rank of this process relative to this group */ + struct MPIR_Pmap pmap; + MPIR_Session *session_ptr; /* Pointer to session to which this group belongs */ }; /* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the @@ -97,18 +96,21 @@ extern MPIR_Group *const MPIR_Group_empty; #define MPIR_Group_release_ref(_group, _inuse) \ do { MPIR_Object_release_ref(_group, _inuse); } while (0) -void MPII_Group_setup_lpid_list(MPIR_Group *); int MPIR_Group_check_valid_ranks(MPIR_Group *, const int[], int); int MPIR_Group_check_valid_ranges(MPIR_Group *, int[][3], int); -void MPIR_Group_setup_lpid_pairs(MPIR_Group *, MPIR_Group *); int MPIR_Group_create(int, MPIR_Group **); int MPIR_Group_release(MPIR_Group * group_ptr); +int MPIR_Group_create_map(int size, int rank, MPIR_Session * session_ptr, MPIR_Lpid * map, + MPIR_Group ** new_group_ptr); +int MPIR_Group_create_stride(int size, int rank, MPIR_Session * session_ptr, + MPIR_Lpid offset, MPIR_Lpid stride, MPIR_Lpid blocksize, + MPIR_Group ** new_group_ptr); +MPIR_Lpid MPIR_Group_rank_to_lpid(MPIR_Group * group, int rank); +int MPIR_Group_lpid_to_rank(MPIR_Group * group, MPIR_Lpid lpid); + int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr); void MPIR_Group_set_session_ptr(MPIR_Group * group_ptr, MPIR_Session * session_out); int MPIR_Group_init(void); -/* internal functions */ -void MPII_Group_setup_lpid_list(MPIR_Group *); - #endif /* MPIR_GROUP_H_INCLUDED */ diff --git a/src/mpi/comm/comm_impl.c b/src/mpi/comm/comm_impl.c index 9dbba6d703f..56db002f58c 100644 --- a/src/mpi/comm/comm_impl.c +++ b/src/mpi/comm/comm_impl.c @@ -68,36 +68,19 @@ int MPIR_Comm_test_threadcomm_impl(MPIR_Comm * comm_ptr, int *flag) static int comm_create_local_group(MPIR_Comm * comm_ptr) { int mpi_errno = MPI_SUCCESS; - MPIR_Group *group_ptr; - int n = comm_ptr->local_size; - - mpi_errno = MPIR_Group_create(n, &group_ptr); - MPIR_ERR_CHECK(mpi_errno); - /* Group belongs to the same session as communicator */ - MPIR_Group_set_session_ptr(group_ptr, comm_ptr->session_ptr); - - group_ptr->is_local_dense_monotonic = TRUE; + int n = comm_ptr->local_size; + MPIR_Lpid *map = MPL_malloc(n * sizeof(MPIR_Lpid), MPL_MEM_GROUP); - int comm_world_size = MPIR_Process.size; for (int i = 0; i < n; i++) { uint64_t lpid; (void) MPID_Comm_get_lpid(comm_ptr, i, &lpid, FALSE); - group_ptr->lrank_to_lpid[i].lpid = lpid; - if (lpid > comm_world_size || (i > 0 && group_ptr->lrank_to_lpid[i - 1].lpid != (lpid - 1))) { - group_ptr->is_local_dense_monotonic = FALSE; - } + map[i] = lpid; } - group_ptr->size = n; - group_ptr->rank = comm_ptr->rank; - group_ptr->idx_of_first_lpid = -1; - - comm_ptr->local_group = group_ptr; - - /* FIXME : Add a sanity check that the size of the group is the same as - * the size of the communicator. This helps catch corrupted - * communicators */ + mpi_errno = MPIR_Group_create_map(n, comm_ptr->rank, comm_ptr->session_ptr, map, + &comm_ptr->local_group); + MPIR_ERR_CHECK(mpi_errno); fn_exit: return mpi_errno; @@ -215,16 +198,13 @@ int MPII_Comm_create_calculate_mapping(MPIR_Group * group_ptr, * exactly the same as the ranks in comm world. */ - /* we examine the group's lpids in both the intracomm and non-comm_world cases */ - MPII_Group_setup_lpid_list(group_ptr); - /* Optimize for groups contained within MPI_COMM_WORLD. */ if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) { int wsize; subsetOfWorld = 1; wsize = MPIR_Process.size; for (i = 0; i < n; i++) { - uint64_t g_lpid = group_ptr->lrank_to_lpid[i].lpid; + MPIR_Lpid g_lpid = MPIR_Group_rank_to_lpid(group_ptr, i); /* This mapping is relative to comm world */ MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, @@ -261,7 +241,7 @@ int MPII_Comm_create_calculate_mapping(MPIR_Group * group_ptr, for (j = 0; j < comm_ptr->local_size; j++) { uint64_t comm_lpid; MPID_Comm_get_lpid(comm_ptr, j, &comm_lpid, FALSE); - if (comm_lpid == group_ptr->lrank_to_lpid[i].lpid) { + if (comm_lpid == MPIR_Group_rank_to_lpid(group_ptr, i)) { mapping[i] = j; break; } @@ -800,7 +780,7 @@ int MPIR_Intercomm_create_from_groups_impl(MPIR_Group * local_group_ptr, int loc int tag = get_tag_from_stringtag(stringtag); /* FIXME: ensure lpid is from comm_world */ - uint64_t remote_lpid = remote_group_ptr->lrank_to_lpid[remote_leader].lpid; + MPIR_Lpid remote_lpid = MPIR_Group_rank_to_lpid(remote_group_ptr, remote_leader); MPIR_Assert(remote_lpid < MPIR_Process.size); mpi_errno = MPIR_Intercomm_create_impl(local_comm, local_leader, MPIR_Process.comm_world, (int) remote_lpid, @@ -931,31 +911,23 @@ int MPIR_Comm_idup_with_info_impl(MPIR_Comm * comm_ptr, MPIR_Info * info, int MPIR_Comm_remote_group_impl(MPIR_Comm * comm_ptr, MPIR_Group ** group_ptr) { int mpi_errno = MPI_SUCCESS; - int i, n; - MPIR_FUNC_ENTER; + /* Create a group and populate it with the local process ids */ if (!comm_ptr->remote_group) { - n = comm_ptr->remote_size; - mpi_errno = MPIR_Group_create(n, group_ptr); - MPIR_ERR_CHECK(mpi_errno); + int n = comm_ptr->remote_size; + MPIR_Lpid *map = MPL_malloc(n * sizeof(MPIR_Lpid), MPL_MEM_GROUP); - for (i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { uint64_t lpid; (void) MPID_Comm_get_lpid(comm_ptr, i, &lpid, TRUE); - (*group_ptr)->lrank_to_lpid[i].lpid = lpid; - /* TODO calculate is_local_dense_monotonic */ + map[i] = lpid; } - (*group_ptr)->size = n; - (*group_ptr)->rank = MPI_UNDEFINED; - (*group_ptr)->idx_of_first_lpid = -1; - - MPIR_Group_set_session_ptr(*group_ptr, comm_ptr->session_ptr); - - comm_ptr->remote_group = *group_ptr; - } else { - *group_ptr = comm_ptr->remote_group; + mpi_errno = MPIR_Group_create_map(n, MPI_UNDEFINED, comm_ptr->session_ptr, map, + &comm_ptr->remote_group); + MPIR_ERR_CHECK(mpi_errno); } + *group_ptr = comm_ptr->remote_group; MPIR_Group_add_ref(comm_ptr->remote_group); fn_exit: diff --git a/src/mpi/comm/ulfm_impl.c b/src/mpi/comm/ulfm_impl.c index dfd4ad6bfcf..33edffa3d11 100644 --- a/src/mpi/comm/ulfm_impl.c +++ b/src/mpi/comm/ulfm_impl.c @@ -87,21 +87,22 @@ int MPIR_Comm_get_failed_impl(MPIR_Comm * comm_ptr, MPIR_Group ** failed_group_p /* create failed_group */ int n = utarray_len(failed_procs); + MPIR_Lpid *map = MPL_malloc(n * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_Group *new_group; - mpi_errno = MPIR_Group_create(n, &new_group); - MPIR_ERR_CHECK(mpi_errno); - new_group->rank = MPI_UNDEFINED; + int myrank = MPI_UNDEFINED; for (int i = 0; i < utarray_len(failed_procs); i++) { int *p = (int *) utarray_eltptr(failed_procs, i); - new_group->lrank_to_lpid[i].lpid = *p; + map[i] = *p; /* if calling process is part of the group, set the rank */ if (*p == MPIR_Process.rank) { - new_group->rank = i; + myrank = i; } } - new_group->size = n; - new_group->idx_of_first_lpid = -1; + + mpi_errno = MPIR_Group_create_map(n, myrank, comm_ptr->session_ptr, map, &new_group); + MPIR_ERR_CHECK(mpi_errno); MPIR_Group *comm_group; MPIR_Comm_group_impl(comm_ptr, &comm_group); diff --git a/src/mpi/group/group_impl.c b/src/mpi/group/group_impl.c index dbd3cd88204..fa123a70efc 100644 --- a/src/mpi/group/group_impl.c +++ b/src/mpi/group/group_impl.c @@ -18,10 +18,25 @@ int MPIR_Group_size_impl(MPIR_Group * group_ptr, int *size) return MPI_SUCCESS; } +int MPIR_Group_free_impl(MPIR_Group * group_ptr) +{ + int mpi_errno = MPI_SUCCESS; + + /* Do not free MPI_GROUP_EMPTY */ + if (group_ptr->handle != MPI_GROUP_EMPTY) { + mpi_errno = MPIR_Group_release(group_ptr); + MPIR_ERR_CHECK(mpi_errno); + } + + fn_exit: + return mpi_errno; + fn_fail: + goto fn_exit; +} + int MPIR_Group_compare_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, int *result) { int mpi_errno = MPI_SUCCESS; - int g1_idx, g2_idx, size, i; /* See if their sizes are equal */ if (group_ptr1->size != group_ptr2->size) { @@ -29,156 +44,93 @@ int MPIR_Group_compare_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, in goto fn_exit; } - /* Run through the lrank to lpid lists of each group in lpid order - * to see if the same processes are involved */ - g1_idx = group_ptr1->idx_of_first_lpid; - g2_idx = group_ptr2->idx_of_first_lpid; - /* If the lpid list hasn't been created, do it now */ - if (g1_idx < 0) { - MPII_Group_setup_lpid_list(group_ptr1); - g1_idx = group_ptr1->idx_of_first_lpid; - } - if (g2_idx < 0) { - MPII_Group_setup_lpid_list(group_ptr2); - g2_idx = group_ptr2->idx_of_first_lpid; - } - while (g1_idx >= 0 && g2_idx >= 0) { - if (group_ptr1->lrank_to_lpid[g1_idx].lpid != group_ptr2->lrank_to_lpid[g2_idx].lpid) { - *result = MPI_UNEQUAL; - goto fn_exit; + int size; + size = group_ptr1->size; + + /* See if they are identical */ + bool is_ident = true; + for (int i = 0; i < size; i++) { + if (MPIR_Group_rank_to_lpid(group_ptr1, i) != MPIR_Group_rank_to_lpid(group_ptr2, i)) { + is_ident = false; + break; } - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; } - /* See if the processes are in the same order by rank */ - size = group_ptr1->size; - for (i = 0; i < size; i++) { - if (group_ptr1->lrank_to_lpid[i].lpid != group_ptr2->lrank_to_lpid[i].lpid) { - *result = MPI_SIMILAR; - goto fn_exit; + if (is_ident) { + *result = MPI_IDENT; + goto fn_exit; + } + + /* See if they are similar */ + bool is_similar = true; + for (int i = 0; i < size; i++) { + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(group_ptr1, i); + if (MPI_UNDEFINED == MPIR_Group_lpid_to_rank(group_ptr2, lpid)) { + /* not found */ + is_similar = false; + break; } } - /* If we reach here, the groups are identical */ - *result = MPI_IDENT; + if (is_similar) { + *result = MPI_SIMILAR; + } else { + *result = MPI_UNEQUAL; + } fn_exit: return mpi_errno; } -int MPIR_Group_difference_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, - MPIR_Group ** new_group_ptr) +int MPIR_Group_translate_ranks_impl(MPIR_Group * gp1, int n, const int ranks1[], + MPIR_Group * gp2, int ranks2[]) { int mpi_errno = MPI_SUCCESS; - int size1, i, k, g1_idx, g2_idx, nnew; - uint64_t l1_pid, l2_pid; - int *flags = NULL; - MPIR_FUNC_ENTER; - /* Return a group consisting of the members of group1 that are *not* - * in group2 */ - size1 = group_ptr1->size; - /* Insure that the lpid lists are setup */ - MPIR_Group_setup_lpid_pairs(group_ptr1, group_ptr2); - - flags = MPL_calloc(size1, sizeof(int), MPL_MEM_OTHER); - - g1_idx = group_ptr1->idx_of_first_lpid; - g2_idx = group_ptr2->idx_of_first_lpid; - - nnew = size1; - while (g1_idx >= 0 && g2_idx >= 0) { - l1_pid = group_ptr1->lrank_to_lpid[g1_idx].lpid; - l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid; - if (l1_pid < l2_pid) { - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - } else if (l1_pid > l2_pid) { - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - } else { - /* Equal */ - flags[g1_idx] = 1; - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - nnew--; - } - } - /* Create the group */ - if (nnew == 0) { - /* See 5.3.2, Group Constructors. For many group routines, - * the standard explicitly says to return MPI_GROUP_EMPTY; - * for others it is implied */ - *new_group_ptr = MPIR_Group_empty; - goto fn_exit; - } else { - mpi_errno = MPIR_Group_create(nnew, new_group_ptr); - /* --BEGIN ERROR HANDLING-- */ - if (mpi_errno) { - goto fn_fail; - } - /* --END ERROR HANDLING-- */ - (*new_group_ptr)->rank = MPI_UNDEFINED; - k = 0; - for (i = 0; i < size1; i++) { - if (!flags[i]) { - (*new_group_ptr)->lrank_to_lpid[k].lpid = group_ptr1->lrank_to_lpid[i].lpid; - if (i == group_ptr1->rank) - (*new_group_ptr)->rank = k; - k++; - } + for (int i = 0; i < n; i++) { + if (ranks1[i] == MPI_PROC_NULL) { + ranks2[i] = MPI_PROC_NULL; + continue; } - /* TODO calculate is_local_dense_monotonic */ + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(gp1, ranks1[i]); + ranks2[i] = MPIR_Group_lpid_to_rank(gp2, lpid); } - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr1->session_ptr); - - fn_exit: - MPL_free(flags); - MPIR_FUNC_EXIT; return mpi_errno; - fn_fail: - goto fn_exit; } int MPIR_Group_excl_impl(MPIR_Group * group_ptr, int n, const int ranks[], MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int size, i, newi; - int *flags = NULL; - MPIR_FUNC_ENTER; - size = group_ptr->size; - - /* Allocate a new group and lrank_to_lpid array */ - mpi_errno = MPIR_Group_create(size - n, new_group_ptr); - MPIR_ERR_CHECK(mpi_errno); + int size = group_ptr->size; + int nnew = size - n; - (*new_group_ptr)->rank = MPI_UNDEFINED; /* Use flag fields to mark the members to *exclude* . */ - - flags = MPL_calloc(size, sizeof(int), MPL_MEM_OTHER); - - for (i = 0; i < n; i++) { + int *flags = MPL_calloc(size, sizeof(int), MPL_MEM_OTHER); + for (int i = 0; i < n; i++) { flags[ranks[i]] = 1; } - newi = 0; - for (i = 0; i < size; i++) { + MPIR_Lpid *map = MPL_malloc(nnew * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); + + int myrank = MPI_UNDEFINED; + int newi = 0; + for (int i = 0; i < size; i++) { if (flags[i] == 0) { - (*new_group_ptr)->lrank_to_lpid[newi].lpid = group_ptr->lrank_to_lpid[i].lpid; - if (group_ptr->rank == i) - (*new_group_ptr)->rank = newi; + map[newi] = MPIR_Group_rank_to_lpid(group_ptr, i); + if (group_ptr->rank == i) { + myrank = newi; + } newi++; } } - (*new_group_ptr)->size = size - n; - (*new_group_ptr)->idx_of_first_lpid = -1; - /* TODO calculate is_local_dense_monotonic */ - - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr->session_ptr); + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr->session_ptr, map, new_group_ptr); + MPIR_ERR_CHECK(mpi_errno); fn_exit: MPL_free(flags); @@ -188,28 +140,10 @@ int MPIR_Group_excl_impl(MPIR_Group * group_ptr, int n, const int ranks[], goto fn_exit; } -int MPIR_Group_free_impl(MPIR_Group * group_ptr) -{ - int mpi_errno = MPI_SUCCESS; - - /* Do not free MPI_GROUP_EMPTY */ - if (group_ptr->handle != MPI_GROUP_EMPTY) { - mpi_errno = MPIR_Group_release(group_ptr); - MPIR_ERR_CHECK(mpi_errno); - } - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - int MPIR_Group_incl_impl(MPIR_Group * group_ptr, int n, const int ranks[], MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int i; - MPIR_FUNC_ENTER; if (n == 0) { @@ -217,98 +151,22 @@ int MPIR_Group_incl_impl(MPIR_Group * group_ptr, int n, const int ranks[], goto fn_exit; } - /* Allocate a new group and lrank_to_lpid array */ - mpi_errno = MPIR_Group_create(n, new_group_ptr); - if (mpi_errno) - goto fn_fail; - - (*new_group_ptr)->rank = MPI_UNDEFINED; - for (i = 0; i < n; i++) { - (*new_group_ptr)->lrank_to_lpid[i].lpid = group_ptr->lrank_to_lpid[ranks[i]].lpid; - if (ranks[i] == group_ptr->rank) - (*new_group_ptr)->rank = i; - } - (*new_group_ptr)->size = n; - (*new_group_ptr)->idx_of_first_lpid = -1; - /* TODO calculate is_local_dense_monotonic */ - - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr->session_ptr); - + int nnew = n; + MPIR_Lpid *map = MPL_malloc(nnew * sizeof(MPIR_Lpid), MPL_MEM_OTHER); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPIR_Group_intersection_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, - MPIR_Group ** new_group_ptr) -{ - int mpi_errno = MPI_SUCCESS; - int size1, i, k, g1_idx, g2_idx, nnew; - uint64_t l1_pid, l2_pid; - int *flags = NULL; - - MPIR_FUNC_ENTER; - /* Return a group consisting of the members of group1 that are - * in group2 */ - size1 = group_ptr1->size; - /* Insure that the lpid lists are setup */ - MPIR_Group_setup_lpid_pairs(group_ptr1, group_ptr2); - - flags = MPL_calloc(size1, sizeof(int), MPL_MEM_OTHER); - - g1_idx = group_ptr1->idx_of_first_lpid; - g2_idx = group_ptr2->idx_of_first_lpid; - - nnew = 0; - while (g1_idx >= 0 && g2_idx >= 0) { - l1_pid = group_ptr1->lrank_to_lpid[g1_idx].lpid; - l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid; - if (l1_pid < l2_pid) { - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - } else if (l1_pid > l2_pid) { - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - } else { - /* Equal */ - flags[g1_idx] = 1; - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - nnew++; + int myrank = MPI_UNDEFINED; + for (int i = 0; i < n; i++) { + map[i] = MPIR_Group_rank_to_lpid(group_ptr, ranks[i]); + if (ranks[i] == group_ptr->rank) { + myrank = i; } } - /* Create the group. Handle the trivial case first */ - if (nnew == 0) { - *new_group_ptr = MPIR_Group_empty; - goto fn_exit; - } - mpi_errno = MPIR_Group_create(nnew, new_group_ptr); + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr->session_ptr, map, new_group_ptr); MPIR_ERR_CHECK(mpi_errno); - (*new_group_ptr)->rank = MPI_UNDEFINED; - (*new_group_ptr)->is_local_dense_monotonic = TRUE; - k = 0; - for (i = 0; i < size1; i++) { - if (flags[i]) { - uint64_t lpid = group_ptr1->lrank_to_lpid[i].lpid; - (*new_group_ptr)->lrank_to_lpid[k].lpid = lpid; - if (i == group_ptr1->rank) - (*new_group_ptr)->rank = k; - if (lpid > MPIR_Process.size || - (k > 0 && (*new_group_ptr)->lrank_to_lpid[k - 1].lpid != (lpid - 1))) { - (*new_group_ptr)->is_local_dense_monotonic = FALSE; - } - - k++; - } - } - - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr1->session_ptr); - fn_exit: - MPL_free(flags); MPIR_FUNC_EXIT; return mpi_errno; fn_fail: @@ -319,17 +177,15 @@ int MPIR_Group_range_excl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int size, i, j, k, nnew, first, last, stride; - int *flags = NULL; - MPIR_FUNC_ENTER; + /* Compute size, assuming that included ranks are valid (and distinct) */ - size = group_ptr->size; - nnew = 0; - for (i = 0; i < n; i++) { - first = ranges[i][0]; - last = ranges[i][1]; - stride = ranges[i][2]; + int size = group_ptr->size; + int nnew = 0; + for (int i = 0; i < n; i++) { + int first = ranges[i][0]; + int last = ranges[i][1]; + int stride = ranges[i][2]; /* works for stride of either sign. Error checking above * has already guaranteed stride != 0 */ nnew += 1 + (last - first) / stride; @@ -341,15 +197,6 @@ int MPIR_Group_range_excl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], goto fn_exit; } - /* Allocate a new group and lrank_to_lpid array */ - mpi_errno = MPIR_Group_create(nnew, new_group_ptr); - /* --BEGIN ERROR HANDLING-- */ - if (mpi_errno) { - goto fn_fail; - } - /* --END ERROR HANDLING-- */ - (*new_group_ptr)->rank = MPI_UNDEFINED; - /* Group members are taken in rank order from the original group, * with the specified members removed. Use the flag array for that * purpose. If this was a critical routine, we could use the @@ -357,41 +204,46 @@ int MPIR_Group_range_excl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], * was enabled *and* we are not MPI_THREAD_MULTIPLE, but since this * is a low-usage routine, we haven't taken that optimization. */ - flags = MPL_calloc(size, sizeof(int), MPL_MEM_OTHER); + int *flags = MPL_calloc(size, sizeof(int), MPL_MEM_OTHER); - for (i = 0; i < n; i++) { - first = ranges[i][0]; - last = ranges[i][1]; - stride = ranges[i][2]; + for (int i = 0; i < n; i++) { + int first = ranges[i][0]; + int last = ranges[i][1]; + int stride = ranges[i][2]; if (stride > 0) { - for (j = first; j <= last; j += stride) { + for (int j = first; j <= last; j += stride) { flags[j] = 1; } } else { - for (j = first; j >= last; j += stride) { + for (int j = first; j >= last; j += stride) { flags[j] = 1; } } } + /* Now, run through the group and pick up the members that were * not excluded */ - k = 0; - for (i = 0; i < size; i++) { + MPIR_Lpid *map = MPL_malloc(nnew * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); + + int myrank = MPI_UNDEFINED; + int k = 0; + for (int i = 0; i < size; i++) { if (!flags[i]) { - (*new_group_ptr)->lrank_to_lpid[k].lpid = group_ptr->lrank_to_lpid[i].lpid; + map[k] = MPIR_Group_rank_to_lpid(group_ptr, i); if (group_ptr->rank == i) { - (*new_group_ptr)->rank = k; + myrank = k; } k++; } } - /* TODO calculate is_local_dense_monotonic */ + MPL_free(flags); - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr->session_ptr); + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr->session_ptr, map, new_group_ptr); + MPIR_ERR_CHECK(mpi_errno); fn_exit: - MPL_free(flags); MPIR_FUNC_EXIT; return mpi_errno; fn_fail: @@ -402,16 +254,14 @@ int MPIR_Group_range_incl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int first, last, stride, nnew, i, j, k; - MPIR_FUNC_ENTER; /* Compute size, assuming that included ranks are valid (and distinct) */ - nnew = 0; - for (i = 0; i < n; i++) { - first = ranges[i][0]; - last = ranges[i][1]; - stride = ranges[i][2]; + int nnew = 0; + for (int i = 0; i < n; i++) { + int first = ranges[i][0]; + int last = ranges[i][1]; + int stride = ranges[i][2]; /* works for stride of either sign. Error checking above * has already guaranteed stride != 0 */ nnew += 1 + (last - first) / stride; @@ -422,40 +272,39 @@ int MPIR_Group_range_incl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], goto fn_exit; } - /* Allocate a new group and lrank_to_lpid array */ - mpi_errno = MPIR_Group_create(nnew, new_group_ptr); - if (mpi_errno) - goto fn_fail; - (*new_group_ptr)->rank = MPI_UNDEFINED; + MPIR_Lpid *map = MPL_malloc(nnew * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); /* Group members taken in order specified by the range array */ /* This could be integrated with the error checking, but since this * is a low-usage routine, we haven't taken that optimization */ - k = 0; - for (i = 0; i < n; i++) { - first = ranges[i][0]; - last = ranges[i][1]; - stride = ranges[i][2]; + int myrank = MPI_UNDEFINED; + int k = 0; + for (int i = 0; i < n; i++) { + int first = ranges[i][0]; + int last = ranges[i][1]; + int stride = ranges[i][2]; if (stride > 0) { - for (j = first; j <= last; j += stride) { - (*new_group_ptr)->lrank_to_lpid[k].lpid = group_ptr->lrank_to_lpid[j].lpid; - if (j == group_ptr->rank) - (*new_group_ptr)->rank = k; + for (int j = first; j <= last; j += stride) { + map[k] = MPIR_Group_rank_to_lpid(group_ptr, j); + if (j == group_ptr->rank) { + myrank = k; + } k++; } } else { - for (j = first; j >= last; j += stride) { - (*new_group_ptr)->lrank_to_lpid[k].lpid = group_ptr->lrank_to_lpid[j].lpid; - if (j == group_ptr->rank) - (*new_group_ptr)->rank = k; + for (int j = first; j >= last; j += stride) { + map[k] = MPIR_Group_rank_to_lpid(group_ptr, j); + if (j == group_ptr->rank) { + myrank = k; + } k++; } } } - /* TODO calculate is_local_dense_monotonic */ - - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr->session_ptr); + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr->session_ptr, map, new_group_ptr); + MPIR_ERR_CHECK(mpi_errno); fn_exit: MPIR_FUNC_EXIT; @@ -464,180 +313,119 @@ int MPIR_Group_range_incl_impl(MPIR_Group * group_ptr, int n, int ranges[][3], goto fn_exit; } -int MPIR_Group_translate_ranks_impl(MPIR_Group * gp1, int n, const int ranks1[], - MPIR_Group * gp2, int ranks2[]) +int MPIR_Group_difference_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, + MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int i, g2_idx; - uint64_t l1_pid, l2_pid; - - MPL_DBG_MSG_S(MPIR_DBG_OTHER, VERBOSE, "gp2->is_local_dense_monotonic=%s", - (gp2->is_local_dense_monotonic ? "TRUE" : "FALSE")); - - /* Initialize the output ranks */ - for (i = 0; i < n; i++) - ranks2[i] = MPI_UNDEFINED; + MPIR_FUNC_ENTER; - if (gp2->size > 0 && gp2->is_local_dense_monotonic) { - /* g2 probably == group_of(MPI_COMM_WORLD); use fast, constant-time lookup */ - uint64_t lpid_offset = gp2->lrank_to_lpid[0].lpid; + MPIR_Assert(group_ptr1->session_ptr == group_ptr2->session_ptr); - for (i = 0; i < n; ++i) { - uint64_t g1_lpid; + MPIR_Lpid *map = MPL_malloc(group_ptr1->size * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); - if (ranks1[i] == MPI_PROC_NULL) { - ranks2[i] = MPI_PROC_NULL; - continue; - } - /* "adjusted" lpid from g1 */ - g1_lpid = gp1->lrank_to_lpid[ranks1[i]].lpid - lpid_offset; - if (g1_lpid < gp2->size) { - ranks2[i] = g1_lpid; - } - /* else leave UNDEFINED */ - } - } else { - /* general, slow path; lookup time is dependent on the user-provided rank values! */ - g2_idx = gp2->idx_of_first_lpid; - if (g2_idx < 0) { - MPII_Group_setup_lpid_list(gp2); - g2_idx = gp2->idx_of_first_lpid; - } - if (g2_idx >= 0) { - /* g2_idx can be < 0 if the g2 group is empty */ - l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; - for (i = 0; i < n; i++) { - if (ranks1[i] == MPI_PROC_NULL) { - ranks2[i] = MPI_PROC_NULL; - continue; - } - l1_pid = gp1->lrank_to_lpid[ranks1[i]].lpid; - /* Search for this l1_pid in group2. Use the following - * optimization: start from the last position in the lpid list - * if possible. A more sophisticated version could use a - * tree based or even hashed search to speed the translation. */ - if (l1_pid < l2_pid || g2_idx < 0) { - /* Start over from the beginning */ - g2_idx = gp2->idx_of_first_lpid; - l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; - } - while (g2_idx >= 0 && l1_pid > l2_pid) { - g2_idx = gp2->lrank_to_lpid[g2_idx].next_lpid; - if (g2_idx >= 0) - l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; - else - l2_pid = (uint64_t) - 1; - } - if (l1_pid == l2_pid) - ranks2[i] = g2_idx; + int nnew = 0; + int myrank = MPI_UNDEFINED; + /* For each rank in group1, search it in group2. */ + for (int i = 0; i < group_ptr1->size; i++) { + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(group_ptr1, i); + if (MPI_UNDEFINED == MPIR_Group_lpid_to_rank(group_ptr2, lpid)) { + /* not found */ + if (i == group_ptr1->rank) { + myrank = nnew; } + map[nnew++] = lpid; } } + + /* Create the group */ + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr1->session_ptr, map, new_group_ptr); + MPIR_ERR_CHECK(mpi_errno); + + fn_exit: + MPIR_FUNC_EXIT; return mpi_errno; + fn_fail: + goto fn_exit; } -int MPIR_Group_union_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, - MPIR_Group ** new_group_ptr) +int MPIR_Group_intersection_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, + MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - int g1_idx, g2_idx, nnew, i, k, size1, size2; - uint64_t mylpid; - int *flags = NULL; - MPIR_FUNC_ENTER; - /* Determine the size of the new group. The new group consists of all - * members of group1 plus the members of group2 that are not in group1. - */ - g1_idx = group_ptr1->idx_of_first_lpid; - g2_idx = group_ptr2->idx_of_first_lpid; + /* Similar to MPI_Group_difference, but take the ranks that are found in group2 */ - /* If the lpid list hasn't been created, do it now */ - if (g1_idx < 0) { - MPII_Group_setup_lpid_list(group_ptr1); - g1_idx = group_ptr1->idx_of_first_lpid; - } - if (g2_idx < 0) { - MPII_Group_setup_lpid_list(group_ptr2); - g2_idx = group_ptr2->idx_of_first_lpid; - } - nnew = group_ptr1->size; - - /* Clear the flag bits on the second group. The flag is set if - * a member of the second group belongs to the union */ - size2 = group_ptr2->size; - flags = MPL_calloc(size2, sizeof(int), MPL_MEM_OTHER); - - /* Loop through the lists that are ordered by lpid (local process - * id) to detect which processes in group 2 are not in group 1 - */ - while (g1_idx >= 0 && g2_idx >= 0) { - uint64_t l1_pid, l2_pid; - l1_pid = group_ptr1->lrank_to_lpid[g1_idx].lpid; - l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid; - if (l1_pid > l2_pid) { - nnew++; - flags[g2_idx] = 1; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - } else if (l1_pid == l2_pid) { - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - } else { - /* l1 < l2 */ - g1_idx = group_ptr1->lrank_to_lpid[g1_idx].next_lpid; - } - } - /* If we hit the end of group1, add the remaining members of group 2 */ - while (g2_idx >= 0) { - nnew++; - flags[g2_idx] = 1; - g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid; - } + MPIR_Assert(group_ptr1->session_ptr == group_ptr2->session_ptr); - if (nnew == 0) { - *new_group_ptr = MPIR_Group_empty; - goto fn_exit; + MPIR_Lpid *map = MPL_malloc(group_ptr1->size * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); + + int nnew = 0; + int myrank = MPI_UNDEFINED; + /* For each rank in group1, search it in group2. */ + for (int i = 0; i < group_ptr1->size; i++) { + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(group_ptr1, i); + if (MPI_UNDEFINED != MPIR_Group_lpid_to_rank(group_ptr2, lpid)) { + /* found */ + if (i == group_ptr1->rank) { + myrank = nnew; + } + map[nnew++] = lpid; + } } - /* Allocate a new group and lrank_to_lpid array */ - mpi_errno = MPIR_Group_create(nnew, new_group_ptr); + /* Create the group */ + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr1->session_ptr, map, new_group_ptr); MPIR_ERR_CHECK(mpi_errno); + fn_exit: + MPIR_FUNC_EXIT; + return mpi_errno; + fn_fail: + goto fn_exit; +} + +int MPIR_Group_union_impl(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2, + MPIR_Group ** new_group_ptr) +{ + int mpi_errno = MPI_SUCCESS; + MPIR_FUNC_ENTER; + + MPIR_Assert(group_ptr1->session_ptr == group_ptr2->session_ptr); + + MPIR_Lpid *map = MPL_malloc((group_ptr1->size + group_ptr2->size) * sizeof(MPIR_Lpid), + MPL_MEM_GROUP); + MPIR_ERR_CHKANDJUMP(!map, mpi_errno, MPI_ERR_OTHER, "**nomem"); + /* If this process is in group1, then we can set the rank now. * If we are not in this group, this assignment will set the * current rank to MPI_UNDEFINED */ - (*new_group_ptr)->rank = group_ptr1->rank; + int myrank = group_ptr1->rank; /* Add group1 */ - size1 = group_ptr1->size; - for (i = 0; i < size1; i++) { - (*new_group_ptr)->lrank_to_lpid[i].lpid = group_ptr1->lrank_to_lpid[i].lpid; + for (int i = 0; i < group_ptr1->size; i++) { + map[i] = MPIR_Group_rank_to_lpid(group_ptr1, i); } /* Add members of group2 that are not in group 1 */ - - if (group_ptr1->rank == MPI_UNDEFINED && group_ptr2->rank >= 0) { - mylpid = group_ptr2->lrank_to_lpid[group_ptr2->rank].lpid; - } else { - mylpid = (uint64_t) - 2; - } - k = size1; - for (i = 0; i < size2; i++) { - if (flags[i]) { - (*new_group_ptr)->lrank_to_lpid[k].lpid = group_ptr2->lrank_to_lpid[i].lpid; - if ((*new_group_ptr)->rank == MPI_UNDEFINED && - group_ptr2->lrank_to_lpid[i].lpid == mylpid) - (*new_group_ptr)->rank = k; - k++; + int nnew = group_ptr1->size; + for (int i = 0; i < group_ptr2->size; i++) { + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(group_ptr2, i); + if (MPI_UNDEFINED == MPIR_Group_lpid_to_rank(group_ptr1, lpid)) { + /* not found */ + if (i == group_ptr2->rank) { + myrank = nnew; + } + map[nnew++] = lpid; } } - /* TODO calculate is_local_dense_monotonic */ - - MPIR_Group_set_session_ptr(*new_group_ptr, group_ptr1->session_ptr); + mpi_errno = MPIR_Group_create_map(nnew, myrank, group_ptr1->session_ptr, map, new_group_ptr); + MPIR_ERR_CHECK(mpi_errno); fn_exit: - MPL_free(flags); MPIR_FUNC_EXIT; return mpi_errno; fn_fail: @@ -648,40 +436,19 @@ int MPIR_Group_from_session_pset_impl(MPIR_Session * session_ptr, const char *ps MPIR_Group ** new_group_ptr) { int mpi_errno = MPI_SUCCESS; - MPIR_Group *group_ptr; if (MPL_stricmp(pset_name, "mpi://WORLD") == 0) { - mpi_errno = MPIR_Group_create(MPIR_Process.size, &group_ptr); + mpi_errno = MPIR_Group_create_stride(MPIR_Process.size, MPIR_Process.rank, session_ptr, + 0, 1, 1, new_group_ptr); MPIR_ERR_CHECK(mpi_errno); - - group_ptr->size = MPIR_Process.size; - group_ptr->rank = MPIR_Process.rank; - group_ptr->is_local_dense_monotonic = TRUE; - for (int i = 0; i < group_ptr->size; i++) { - group_ptr->lrank_to_lpid[i].lpid = i; - group_ptr->lrank_to_lpid[i].next_lpid = i + 1; - } - group_ptr->lrank_to_lpid[group_ptr->size - 1].next_lpid = -1; - group_ptr->idx_of_first_lpid = 0; } else if (MPL_stricmp(pset_name, "mpi://SELF") == 0) { - mpi_errno = MPIR_Group_create(1, &group_ptr); + mpi_errno = MPIR_Group_create_stride(1, 0, session_ptr, 0, 1, 1, new_group_ptr); MPIR_ERR_CHECK(mpi_errno); - - group_ptr->size = 1; - group_ptr->rank = 0; - group_ptr->is_local_dense_monotonic = TRUE; - group_ptr->lrank_to_lpid[0].lpid = MPIR_Process.rank; - group_ptr->lrank_to_lpid[0].next_lpid = -1; - group_ptr->idx_of_first_lpid = 0; } else { /* TODO: Implement pset struct, locate pset struct ptr */ MPIR_ERR_SETANDSTMT(mpi_errno, MPI_ERR_ARG, goto fn_fail, "**psetinvalidname"); } - MPIR_Group_set_session_ptr(group_ptr, session_ptr); - - *new_group_ptr = group_ptr; - fn_exit: return mpi_errno; fn_fail: diff --git a/src/mpi/group/groupdebug.c b/src/mpi/group/groupdebug.c deleted file mode 100644 index a70b9592d2f..00000000000 --- a/src/mpi/group/groupdebug.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" -#include "group.h" - -/* style: allow:fprintf:2 sig:0 */ -/* style: PMPIuse:PMPI_Abort:2 sig:0 */ - -/* - * This file contains routines that are used only to perform testing - * and debugging of the group routines - */ -void MPITEST_Group_create(int, int, MPI_Group *); -void MPITEST_Group_print(MPI_Group); - -/* --BEGIN DEBUG-- */ -void MPITEST_Group_create(int nproc, int myrank, MPI_Group * new_group) -{ - MPIR_Group *new_group_ptr; - int i; - - new_group_ptr = (MPIR_Group *) MPIR_Handle_obj_alloc(&MPIR_Group_mem); - if (!new_group_ptr) { - fprintf(stderr, "Could not create a new group\n"); - PMPI_Abort(MPI_COMM_WORLD, 1); - } - MPIR_Object_set_ref(new_group_ptr, 1); - new_group_ptr->lrank_to_lpid = - (MPII_Group_pmap_t *) MPL_malloc(nproc * sizeof(MPII_Group_pmap_t), MPL_MEM_DEBUG); - if (!new_group_ptr->lrank_to_lpid) { - fprintf(stderr, "Could not create lrank map for new group\n"); - PMPI_Abort(MPI_COMM_WORLD, 1); - } - - new_group_ptr->rank = MPI_UNDEFINED; - for (i = 0; i < nproc; i++) { - new_group_ptr->lrank_to_lpid[i].lrank = i; - new_group_ptr->lrank_to_lpid[i].lpid = i; - } - new_group_ptr->size = nproc; - new_group_ptr->rank = myrank; - new_group_ptr->idx_of_first_lpid = -1; - - *new_group = new_group_ptr->handle; -} - -void MPITEST_Group_print(MPI_Group g) -{ - MPIR_Group *g_ptr; - int g_idx, size, i; - - MPIR_Group_get_ptr(g, g_ptr); - - g_idx = g_ptr->idx_of_first_lpid; - if (g_idx < 0) { - MPII_Group_setup_lpid_list(g_ptr); - g_idx = g_ptr->idx_of_first_lpid; - } - - /* Loop through these, printing the lpids by rank and in order */ - size = g_ptr->size; - fprintf(stdout, "Lpids in rank order\n"); - for (i = 0; i < size; i++) { - fprintf(stdout, "Rank %d has lpid %d\n", i, g_ptr->lrank_to_lpid[i].lpid); - } - - fprintf(stdout, "Ranks in lpid order\n"); - while (g_idx >= 0) { - fprintf(stdout, "Rank %d has lpid %d\n", g_idx, g_ptr->lrank_to_lpid[g_idx].lpid); - g_idx = g_ptr->lrank_to_lpid[g_idx].next_lpid; - } -} - -/* --END DEBUG-- */ diff --git a/src/mpi/group/grouputil.c b/src/mpi/group/grouputil.c index ac777e50305..59c45561eca 100644 --- a/src/mpi/group/grouputil.c +++ b/src/mpi/group/grouputil.c @@ -28,10 +28,9 @@ int MPIR_Group_init(void) MPIR_Object_set_ref(&MPIR_Group_builtin[0], 1); MPIR_Group_builtin[0].size = 0; MPIR_Group_builtin[0].rank = MPI_UNDEFINED; - MPIR_Group_builtin[0].idx_of_first_lpid = -1; - MPIR_Group_builtin[0].lrank_to_lpid = NULL; + MPIR_Group_builtin[0].session_ptr = NULL; + memset(&MPIR_Group_builtin[0].pmap, 0, sizeof(struct MPIR_Pmap)); - /* TODO hook for device here? */ return mpi_errno; } @@ -44,7 +43,9 @@ int MPIR_Group_release(MPIR_Group * group_ptr) MPIR_Group_release_ref(group_ptr, &inuse); if (!inuse) { /* Only if refcount is 0 do we actually free. */ - MPL_free(group_ptr->lrank_to_lpid); + if (group_ptr->pmap.use_map) { + MPL_free(group_ptr->pmap.u.map); + } if (group_ptr->session_ptr != NULL) { /* Release session */ MPIR_Session_release(group_ptr->session_ptr); @@ -73,151 +74,137 @@ int MPIR_Group_create(int nproc, MPIR_Group ** new_group_ptr) } /* --END ERROR HANDLING-- */ MPIR_Object_set_ref(*new_group_ptr, 1); - (*new_group_ptr)->lrank_to_lpid = - (MPII_Group_pmap_t *) MPL_calloc(nproc, sizeof(MPII_Group_pmap_t), MPL_MEM_GROUP); - /* --BEGIN ERROR HANDLING-- */ - if (!(*new_group_ptr)->lrank_to_lpid) { - MPIR_Handle_obj_free(&MPIR_Group_mem, *new_group_ptr); - *new_group_ptr = NULL; - MPIR_CHKMEM_SETERR(mpi_errno, nproc * sizeof(MPII_Group_pmap_t), "newgroup->lrank_to_lpid"); - return mpi_errno; - } - /* --END ERROR HANDLING-- */ + + /* initialize fields */ (*new_group_ptr)->size = nproc; - /* Make sure that there is no question that the list of ranks sorted - * by pids is marked as uninitialized */ - (*new_group_ptr)->idx_of_first_lpid = -1; + (*new_group_ptr)->rank = MPI_UNDEFINED; + (*new_group_ptr)->session_ptr = NULL; + memset(&(*new_group_ptr)->pmap, 0, sizeof(struct MPIR_Pmap)); + (*new_group_ptr)->pmap.size = nproc; + + return mpi_errno; +} + +int MPIR_Group_create_map(int size, int rank, MPIR_Session * session_ptr, MPIR_Lpid * map, + MPIR_Group ** new_group_ptr) +{ + int mpi_errno = MPI_SUCCESS; + + if (size == 0) { + /* See 5.3.2, Group Constructors. For many group routines, + * the standard explicitly says to return MPI_GROUP_EMPTY; + * for others it is implied */ + MPL_free(map); + *new_group_ptr = MPIR_Group_empty; + goto fn_exit; + } else { + MPIR_Group *newgrp; + mpi_errno = MPIR_Group_create(size, &newgrp); + MPIR_ERR_CHECK(mpi_errno); - (*new_group_ptr)->is_local_dense_monotonic = FALSE; + newgrp->rank = rank; + MPIR_Group_set_session_ptr(newgrp, session_ptr); - (*new_group_ptr)->session_ptr = NULL; + newgrp->pmap.use_map = true; + newgrp->pmap.u.map = map; + + /* TODO: build hash to accelerate MPIR_Group_lpid_to_rank */ + *new_group_ptr = newgrp; + } + + fn_exit: return mpi_errno; + fn_fail: + goto fn_exit; } -/* - * return value is the first index in the list - * - * This "sorts" an lpid array by lpid value, using a simple merge sort - * algorithm. - * - * In actuality, it does not reorder the elements of maparray (these must remain - * in group rank order). Instead it builds the traversal order (in increasing - * lpid order) through the maparray given by the "next_lpid" fields. - */ -static int mergesort_lpidarray(MPII_Group_pmap_t maparray[], int n) +int MPIR_Group_create_stride(int size, int rank, MPIR_Session * session_ptr, + MPIR_Lpid offset, MPIR_Lpid stride, MPIR_Lpid blocksize, + MPIR_Group ** new_group_ptr) { - int idx1, idx2, first_idx, cur_idx, next_lpid, idx2_offset; + int mpi_errno = MPI_SUCCESS; - if (n == 2) { - if (maparray[0].lpid > maparray[1].lpid) { - first_idx = 1; - maparray[0].next_lpid = -1; - maparray[1].next_lpid = 0; - } else { - first_idx = 0; - maparray[0].next_lpid = 1; - maparray[1].next_lpid = -1; - } - return first_idx; + if (size == 0) { + /* See 5.3.2, Group Constructors. For many group routines, + * the standard explicitly says to return MPI_GROUP_EMPTY; + * for others it is implied */ + *new_group_ptr = MPIR_Group_empty; + goto fn_exit; + } else { + MPIR_Group *newgrp; + mpi_errno = MPIR_Group_create(size, &newgrp); + MPIR_ERR_CHECK(mpi_errno); + + newgrp->rank = rank; + MPIR_Group_set_session_ptr(newgrp, session_ptr); + + newgrp->pmap.use_map = false; + newgrp->pmap.u.stride.offset = offset; + newgrp->pmap.u.stride.stride = stride; + newgrp->pmap.u.stride.blocksize = blocksize; + + *new_group_ptr = newgrp; } - if (n == 1) { - maparray[0].next_lpid = -1; - return 0; + + fn_exit: + return mpi_errno; + fn_fail: + goto fn_exit; +} + +static MPIR_Lpid pmap_rank_to_lpid(struct MPIR_Pmap *pmap, int rank) +{ + if (rank < 0 || rank >= pmap->size) { + return MPI_UNDEFINED; } - if (n == 0) - return -1; - - /* Sort each half */ - idx2_offset = n / 2; - idx1 = mergesort_lpidarray(maparray, n / 2); - idx2 = mergesort_lpidarray(maparray + idx2_offset, n - n / 2) + idx2_offset; - /* merge the results */ - /* There are three lists: - * first_idx - points to the HEAD of the sorted, merged list - * cur_idx - points to the LAST element of the sorted, merged list - * idx1 - points to the HEAD of one sorted list - * idx2 - points to the HEAD of the other sorted list - * - * We first identify the head element of the sorted list. We then - * take elements from the remaining lists. When one list is empty, - * we add the other list to the end of sorted list. - * - * The last wrinkle is that the next_lpid fields in maparray[idx2] - * are relative to n/2, not 0 (that is, a next_lpid of 1 is - * really 1 + n/2, relative to the beginning of maparray). - */ - /* Find the head element */ - if (maparray[idx1].lpid > maparray[idx2].lpid) { - first_idx = idx2; - idx2 = maparray[idx2].next_lpid + idx2_offset; + + if (pmap->use_map) { + return pmap->u.map[rank]; } else { - first_idx = idx1; - idx1 = maparray[idx1].next_lpid; + MPIR_Lpid i_blk = rank / pmap->u.stride.blocksize; + MPIR_Lpid r_blk = rank % pmap->u.stride.blocksize; + return pmap->u.stride.offset + i_blk * pmap->u.stride.stride + r_blk; } +} - /* Merge the lists until one is empty */ - cur_idx = first_idx; - while (idx1 >= 0 && idx2 >= 0) { - if (maparray[idx1].lpid > maparray[idx2].lpid) { - next_lpid = maparray[idx2].next_lpid; - if (next_lpid >= 0) - next_lpid += idx2_offset; - maparray[cur_idx].next_lpid = idx2; - cur_idx = idx2; - idx2 = next_lpid; - } else { - next_lpid = maparray[idx1].next_lpid; - maparray[cur_idx].next_lpid = idx1; - cur_idx = idx1; - idx1 = next_lpid; +static int pmap_lpid_to_rank(struct MPIR_Pmap *pmap, MPIR_Lpid lpid) +{ + if (pmap->use_map) { + /* Use linear search for now. + * Optimization: build hash map in MPIR_Group_create_map and do O(1) hash lookup + */ + for (int rank = 0; rank < pmap->size; rank++) { + if (pmap->u.map[rank] == lpid) { + return rank; + } } - } - /* Add whichever list remains */ - if (idx1 >= 0) { - maparray[cur_idx].next_lpid = idx1; + return MPI_UNDEFINED; } else { - maparray[cur_idx].next_lpid = idx2; - /* Convert the rest of these next_lpid values to be - * relative to the beginning of maparray */ - while (idx2 >= 0) { - next_lpid = maparray[idx2].next_lpid; - if (next_lpid >= 0) { - next_lpid += idx2_offset; - maparray[idx2].next_lpid = next_lpid; - } - idx2 = next_lpid; + lpid -= pmap->u.stride.offset; + MPIR_Lpid i_blk = lpid / pmap->u.stride.stride; + MPIR_Lpid r_blk = lpid % pmap->u.stride.stride; + + if (r_blk >= pmap->u.stride.blocksize) { + return MPI_UNDEFINED; } - } - return first_idx; + int rank = i_blk * pmap->u.stride.blocksize + r_blk; + if (rank >= 0 && rank < pmap->size) { + return rank; + } else { + return MPI_UNDEFINED; + } + } } -/* - * Create a list of the lpids, in lpid order. - * - * Called by group_compare, group_translate_ranks, group_union - * - * In the case of a single main thread lock, the lock must - * be held on entry to this routine. This forces some of the routines - * noted above to hold the SINGLE_CS; which would otherwise not be required. - */ -void MPII_Group_setup_lpid_list(MPIR_Group * group_ptr) +int MPIR_Group_lpid_to_rank(MPIR_Group * group, MPIR_Lpid lpid) { - if (group_ptr->idx_of_first_lpid == -1) { - group_ptr->idx_of_first_lpid = - mergesort_lpidarray(group_ptr->lrank_to_lpid, group_ptr->size); - } + return pmap_lpid_to_rank(&group->pmap, lpid); } -void MPIR_Group_setup_lpid_pairs(MPIR_Group * group_ptr1, MPIR_Group * group_ptr2) +MPIR_Lpid MPIR_Group_rank_to_lpid(MPIR_Group * group, int rank) { - /* If the lpid list hasn't been created, do it now */ - if (group_ptr1->idx_of_first_lpid < 0) { - MPII_Group_setup_lpid_list(group_ptr1); - } - if (group_ptr2->idx_of_first_lpid < 0) { - MPII_Group_setup_lpid_list(group_ptr2); - } + return pmap_rank_to_lpid(&group->pmap, rank); } #ifdef HAVE_ERROR_CHECKING @@ -355,54 +342,40 @@ int MPIR_Group_check_valid_ranges(MPIR_Group * group_ptr, int ranges[][3], int n int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr) { int mpi_errno = MPI_SUCCESS; - int g1_idx, g2_idx, l1_pid, l2_pid, i; - MPII_Group_pmap_t *vmap = 0; + int vsize = comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM ? comm_ptr->local_size : comm_ptr->remote_size; - MPIR_CHKLMEM_DECL(1); - - MPIR_Assert(group_ptr != NULL); - - MPIR_CHKLMEM_MALLOC(vmap, MPII_Group_pmap_t *, - vsize * sizeof(MPII_Group_pmap_t), mpi_errno, "", MPL_MEM_GROUP); /* Initialize the vmap */ - for (i = 0; i < vsize; i++) { - MPID_Comm_get_lpid(comm_ptr, i, &vmap[i].lpid, FALSE); - vmap[i].next_lpid = 0; + MPIR_Lpid *vmap = MPL_malloc(vsize * sizeof(MPIR_Lpid), MPL_MEM_GROUP); + for (int i = 0; i < vsize; i++) { + /* FIXME: MPID_Comm_get_lpid to be removed */ + uint64_t dev_lpid; + MPID_Comm_get_lpid(comm_ptr, i, &dev_lpid, FALSE); + MPIR_Assert((dev_lpid >> 32) == 0); + vmap[i] = dev_lpid; } - MPII_Group_setup_lpid_list(group_ptr); - g1_idx = group_ptr->idx_of_first_lpid; - g2_idx = mergesort_lpidarray(vmap, vsize); - MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, - "initial indices: %d %d\n", g1_idx, g2_idx)); - while (g1_idx >= 0 && g2_idx >= 0) { - l1_pid = group_ptr->lrank_to_lpid[g1_idx].lpid; - l2_pid = vmap[g2_idx].lpid; - MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, - "Lpids are %d, %d\n", l1_pid, l2_pid)); - if (l1_pid < l2_pid) { - /* If we have to advance g1, we didn't find a match, so - * that's an error. */ - break; - } else if (l1_pid > l2_pid) { - g2_idx = vmap[g2_idx].next_lpid; - } else { - /* Equal */ - g1_idx = group_ptr->lrank_to_lpid[g1_idx].next_lpid; - g2_idx = vmap[g2_idx].next_lpid; + for (int rank = 0; rank < group_ptr->size; rank++) { + MPIR_Lpid lpid = MPIR_Group_rank_to_lpid(group_ptr, rank); + bool found = false; + for (int i = 0; i < vsize; i++) { + if (vmap[i] == lpid) { + found = true; + break; + } + } + if (!found) { + MPIR_ERR_SET1(mpi_errno, MPI_ERR_GROUP, "**groupnotincomm", + "**groupnotincomm %d", rank); + goto fn_fail; } - MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, - "g1 = %d, g2 = %d\n", g1_idx, g2_idx)); - } - - if (g1_idx >= 0) { - MPIR_ERR_SET1(mpi_errno, MPI_ERR_GROUP, "**groupnotincomm", "**groupnotincomm %d", g1_idx); } - fn_fail: - MPIR_CHKLMEM_FREEALL(); + fn_exit: + MPL_free(vmap); return mpi_errno; + fn_fail: + goto fn_exit; } #endif /* HAVE_ERROR_CHECKING */ diff --git a/src/mpid/ch3/src/ch3u_comm.c b/src/mpid/ch3/src/ch3u_comm.c index b704d3042e2..ce2f495055b 100644 --- a/src/mpid/ch3/src/ch3u_comm.c +++ b/src/mpid/ch3/src/ch3u_comm.c @@ -512,7 +512,7 @@ static int nonempty_intersection(MPIR_Comm *comm, MPIR_Group *group, int *flag) for (i_g = 0; i_g < group->size; ++i_g) { /* FIXME: This won't work for dynamic procs */ - MPIDI_PG_Get_vc(MPIDI_Process.my_pg, group->lrank_to_lpid[i_g].lpid, &vc_g); + MPIDI_PG_Get_vc(MPIDI_Process.my_pg, MPIR_Group_rank_to_lpid(group, i_g), &vc_g); for (i_c = 0; i_c < comm->remote_size; ++i_c) { MPIDI_Comm_get_vc(comm, i_c, &vc_c); if (vc_g == vc_c) { diff --git a/src/mpid/ch3/src/ch3u_handle_connection.c b/src/mpid/ch3/src/ch3u_handle_connection.c index ef5819aaf3d..17ef122cb7f 100644 --- a/src/mpid/ch3/src/ch3u_handle_connection.c +++ b/src/mpid/ch3/src/ch3u_handle_connection.c @@ -372,7 +372,7 @@ static int terminate_failed_VCs(MPIR_Group *new_failed_group) MPIDI_VC_t *vc; /* terminate the VC */ /* FIXME: This won't work for dynamic procs */ - MPIDI_PG_Get_vc(MPIDI_Process.my_pg, new_failed_group->lrank_to_lpid[i].lpid, &vc); + MPIDI_PG_Get_vc(MPIDI_Process.my_pg, MPIR_Group_rank_to_lpid(new_failed_group, i), &vc); mpi_errno = MPIDI_CH3_Connection_terminate(vc); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpid/ch4/src/ch4_impl.h b/src/mpid/ch4/src/ch4_impl.h index 8991052f1a5..2f5a31dc767 100644 --- a/src/mpid/ch4/src/ch4_impl.h +++ b/src/mpid/ch4/src/ch4_impl.h @@ -387,7 +387,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDIU_valid_group_rank(MPIR_Comm * comm, int rank, MPIDI_NM_comm_get_gpid(comm, rank, &gpid, FALSE); - for (z = 0; z < size && gpid != grp->lrank_to_lpid[z].lpid; ++z) { + for (z = 0; z < size; ++z) { + if (gpid == MPIR_Group_rank_to_lpid(grp, z)) { + break; + } } ret = (z < size); diff --git a/test/mpi/group/Makefile.am b/test/mpi/group/Makefile.am index d647c9d377a..993dab99371 100644 --- a/test/mpi/group/Makefile.am +++ b/test/mpi/group/Makefile.am @@ -16,7 +16,3 @@ noinst_PROGRAMS = \ groupcreate \ gtranks \ groupnullincl - -# glpid is a whitebox test that uses mpiimpl.h; it is unlikely to build with the -# current build system setup -#EXTRA_PROGRAMS = glpid diff --git a/test/mpi/group/glpid.c b/test/mpi/group/glpid.c deleted file mode 100644 index 06238aeb942..00000000000 --- a/test/mpi/group/glpid.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include -#include "mpi.h" -#include "mpiimpl.h" - -int main(int argc, char *argv[]) -{ - MPIR_Group group, *group_ptr = &group; - int i; - - MPI_Init(&argc, &argv); - - /* Setup a sample group */ - group.handle = 1; - group.ref_count = 1; - group.size = 4; - group.rank = 0; - group.idx_of_first_lpid = -1; - group.lrank_to_lpid = (MPII_Group_pmap_t *) - MPL_malloc(group.size * sizeof(MPII_Group_pmap_t), MPL_MEM_OTHER); - for (i = 0; i < group.size; i++) { - group.lrank_to_lpid[i].lrank = i; - group.lrank_to_lpid[i].lpid = group.size - i - 1; - group.lrank_to_lpid[i].next_lpid = -1; - group.lrank_to_lpid[i].flag = 0; - } - - /* Set up the group lpid list */ - MPII_Group_setup_lpid_list(group_ptr); - - /* Print the group structure */ - printf("Index of first lpid = %d\n", group.idx_of_first_lpid); - for (i = 0; i < group.size; i++) { - printf("lrank_to_lpid[%d].next_lpid = %d, .lpid = %d\n", - i, group.lrank_to_lpid[i].next_lpid, group.lrank_to_lpid[i].lpid); - } - - MPI_Finalize(); - return 0; -}