Skip to content

Commit

Permalink
add LCI_IBV_GID_IDX to enable ibv GID
Browse files Browse the repository at this point in the history
  • Loading branch information
JiakunYan committed Aug 23, 2024
1 parent c19c76d commit 393057c
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 14 deletions.
6 changes: 6 additions & 0 deletions lci/api/lci.h
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,12 @@ extern int LCI_RECV_SLOW_DOWN_USEC;
*/
extern bool LCI_IBV_ENABLE_TD;

/**
* @ingroup LCI_COMM
* @brief Which gid index to use for the ibv backend.
*/
extern int LCI_IBV_GID_IDX;

/**
* @ingroup LCI_COMM
* @brief Whether to enable the progress specific network endpoint.
Expand Down
6 changes: 2 additions & 4 deletions lci/backend/ibv/lcisi_ibv_detail.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,8 @@ static double translate_speed(uint8_t speed)
}
}

bool LCISI_ibv_select_best_device_port(struct ibv_device** dev_list,
int num_devices,
struct ibv_device** device_o,
uint8_t* port_o)
bool select_best_device_port(struct ibv_device** dev_list, int num_devices,
struct ibv_device** device_o, uint8_t* port_o)
{
struct ibv_device* best_device;
uint8_t best_port;
Expand Down
30 changes: 26 additions & 4 deletions lci/backend/ibv/lcisi_ibv_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,31 @@
#include "infiniband/verbs.h"
#include <stdbool.h>

bool LCISI_ibv_select_best_device_port(struct ibv_device** dev_list,
int num_devices,
struct ibv_device** device_o,
uint8_t* port_o);
bool select_best_device_port(struct ibv_device** dev_list, int num_devices,
struct ibv_device** device_o, uint8_t* port_o);

void gid_to_wire_gid(const union ibv_gid* gid, char wgid[])
{
uint32_t tmp_gid[4];
int i;

memcpy(tmp_gid, gid, sizeof(tmp_gid));
for (i = 0; i < 4; ++i) sprintf(&wgid[i * 8], "%08x", htobe32(tmp_gid[i]));
}

void wire_gid_to_gid(const char* wgid, union ibv_gid* gid)
{
char tmp[9];
__be32 v32;
int i;
uint32_t tmp_gid[4];

for (tmp[8] = 0, i = 0; i < 4; ++i) {
memcpy(tmp, wgid + i * 8, 8);
sscanf(tmp, "%x", &v32);
tmp_gid[i] = be32toh(v32);
}
memcpy(gid, tmp_gid, sizeof(*gid));
}

#endif
38 changes: 33 additions & 5 deletions lci/backend/ibv/server_ibv.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "runtime/lcii.h"
#include "backend/ibv/lcisi_ibv_detail.h"

static const int max_sge_num = 1;
static const int inline_size = 236;
Expand Down Expand Up @@ -85,8 +86,8 @@ void LCISD_server_init(LCIS_server_t* s)
exit(EXIT_FAILURE);
}

bool ret = LCISI_ibv_select_best_device_port(
server->dev_list, num_devices, &server->ib_dev, &server->dev_port);
bool ret = select_best_device_port(server->dev_list, num_devices,
&server->ib_dev, &server->dev_port);
LCI_Assert(ret, "Cannot find available ibv device/port!\n");

// ibv_open_device provides the user with a verbs context which is the object
Expand Down Expand Up @@ -167,6 +168,20 @@ void LCISD_server_init(LCIS_server_t* s)
LCI_Log(LCI_LOG_INFO, "ibv", "Maximum MTU: %s; Active MTU: %s\n",
mtu_str(server->port_attr.max_mtu),
mtu_str(server->port_attr.active_mtu));

// query the gid
server->gid_idx = LCI_IBV_GID_IDX;
if (server->gid_idx >= 0) {
LCI_Log(LCI_LOG_INFO, "ibv", "Use GID index: %d\n", server->gid_idx);
if (ibv_query_gid(server->dev_ctx, server->dev_port, server->gid_idx,
&server->gid)) {
fprintf(stderr, "can't read sgid of index %d\n", server->gid_idx);
exit(EXIT_FAILURE);
}
} else
memset(&server->gid, 0, sizeof(server->gid));

// Initialize the event polling thread
LCISI_event_polling_thread_init(server);
}

Expand Down Expand Up @@ -337,12 +352,15 @@ void LCISD_endpoint_init(LCIS_server_t server_pp, LCIS_endpoint_t* endpoint_pp,
exit(EXIT_FAILURE);
}
}
char wgid[33];
memset(wgid, 0, sizeof(wgid));
gid_to_wire_gid(&endpoint_p->server->gid, wgid);
// Use this queue pair "i" to connect to rank e.
char key[LCT_PMI_STRING_LIMIT + 1];
sprintf(key, "LCI_KEY_%d_%d_%d", endpoint_id, LCI_RANK, i);
char value[LCT_PMI_STRING_LIMIT + 1];
sprintf(value, "%x:%hx", endpoint_p->qps[i]->qp_num,
endpoint_p->server->port_attr.lid);
sprintf(value, "%x:%hx:%s", endpoint_p->qps[i]->qp_num,
endpoint_p->server->port_attr.lid, wgid);
LCT_pmi_publish(key, value);
}
LCI_Log(LCI_LOG_INFO, "ibv", "Current inline data size is %d\n", inline_size);
Expand All @@ -356,7 +374,10 @@ void LCISD_endpoint_init(LCIS_server_t server_pp, LCIS_endpoint_t* endpoint_pp,
LCT_pmi_getname(i, key, value);
uint32_t dest_qpn;
uint16_t dest_lid;
sscanf(value, "%x:%hx", &dest_qpn, &dest_lid);
union ibv_gid gid;
char wgid[33];
sscanf(value, "%x:%hx:%s", &dest_qpn, &dest_lid, wgid);
wire_gid_to_gid(wgid, &gid);
// Once a queue pair (QP) has receive buffers posted to it, it is now
// possible to transition the QP into the ready to receive (RTR) state.
{
Expand All @@ -383,6 +404,13 @@ void LCISD_endpoint_init(LCIS_server_t server_pp, LCIS_endpoint_t* endpoint_pp,
attr.min_rnr_timer = 12;
// should not be necessary to set these, given is_global = 0
memset(&attr.ah_attr.grh, 0, sizeof attr.ah_attr.grh);
// If we are using gid
if (gid.global.interface_id) {
attr.ah_attr.is_global = 1;
attr.ah_attr.grh.hop_limit = 1;
attr.ah_attr.grh.dgid = gid;
attr.ah_attr.grh.sgid_index = endpoint_p->server->gid_idx;
}

int flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC |
Expand Down
2 changes: 2 additions & 0 deletions lci/backend/ibv/server_ibv.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ typedef struct __attribute__((aligned(LCI_CACHE_LINE))) LCISI_server_t {
uint8_t dev_port;
struct ibv_mr* odp_mr;
size_t max_inline;
int gid_idx;
union ibv_gid gid;
// event polling thread
pthread_t event_polling_thread;
atomic_bool event_polling_thread_run;
Expand Down
1 change: 0 additions & 1 deletion lci/backend/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ static inline LCI_error_t LCISD_post_recv(LCIS_endpoint_t endpoint_pp,
#endif
#ifdef LCI_USE_SERVER_IBV
#include "backend/ibv/server_ibv.h"
#include "backend/ibv/lcisi_ibv_detail.h"
#endif
#ifdef LCI_USE_SERVER_UCX
#include "backend/ucx/server_ucx.h"
Expand Down
2 changes: 2 additions & 0 deletions lci/runtime/env.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ LCI_API bool LCI_IBV_ENABLE_EVENT_POLLING_THREAD;
LCI_API int LCI_SEND_SLOW_DOWN_USEC;
LCI_API int LCI_RECV_SLOW_DOWN_USEC;
LCI_API bool LCI_IBV_ENABLE_TD;
LCI_API int LCI_IBV_GID_IDX;
LCI_API bool LCI_ENABLE_PRG_NET_ENDPOINT;
LCI_API LCI_rdv_protocol_t LCI_RDV_PROTOCOL;
LCI_API bool LCI_OFI_CXI_TRY_NO_HACK;
Expand Down Expand Up @@ -85,6 +86,7 @@ void LCII_env_init(int num_proc, int rank)
#endif
LCI_IBV_ENABLE_TD =
LCIU_getenv_or("LCI_IBV_ENABLE_TD", LCI_IBV_ENABLE_TD_DEFAULT);
LCI_IBV_GID_IDX = LCIU_getenv_or("LCI_IBV_GID_IDX", -1);
LCI_ENABLE_PRG_NET_ENDPOINT = LCIU_getenv_or(
"LCI_ENABLE_PRG_NET_ENDPOINT", LCI_ENABLE_PRG_NET_ENDPOINT_DEFAULT);
LCI_MEDIUM_SIZE = LCI_PACKET_SIZE - sizeof(struct LCII_packet_context);
Expand Down
4 changes: 4 additions & 0 deletions lct/pmi/pmi_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,15 @@ int LCT_pmi_get_rank() { return lcti_pmi_ops.get_rank(); }
int LCT_pmi_get_size() { return lcti_pmi_ops.get_size(); }
void LCT_pmi_publish(char* key, char* value)
{
LCT_Log(LCT_log_ctx_default, LCT_LOG_DEBUG, "pmi", "publish %s %s\n", key,
value);
lcti_pmi_ops.publish(key, value);
}
void LCT_pmi_getname(int rank, char* key, char* value)
{
lcti_pmi_ops.getname(rank, key, value);
LCT_Log(LCT_log_ctx_default, LCT_LOG_DEBUG, "pmi", "getname %d %s %s\n", rank,
key, value);
}
void LCT_pmi_barrier()
{
Expand Down

0 comments on commit 393057c

Please sign in to comment.