From 461b6ed1f85cc036e608aec1df94c5b971b2ee53 Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Thu, 4 Jun 2020 19:52:34 -0500 Subject: [PATCH 1/7] pool: don't try to steal from own thread steal id = (rand (mod n-1)) + id + 1 (mod n) Current implementation has a lot of repeated calls to lc_pool_get_local that the compiler doesn't seem to fully get rid of. Maybe marking it with __attribute__((pure)) will help? Need to re-read docs to ensure we don't violate its constraints. --- include/lc/pool.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/lc/pool.h b/include/lc/pool.h index ae9d9040..6e4560ea 100644 --- a/include/lc/pool.h +++ b/include/lc/pool.h @@ -90,14 +90,21 @@ LC_INLINE int32_t lc_pool_get_local(struct lc_pool* pool) return pid; } -LC_INLINE void* lc_pool_get_slow(struct lc_pool* pool) { - void* elm = NULL; - while (!elm) { - int steal = rand() % (pool->npools); +LC_INLINE int32_t lc_pool_get_steal(struct lc_pool* pool) +{ + int32_t pid = lc_pool_get_local(pool); + int32_t npools = pool->npools; + int32_t r = rand() % (npools - 1); + return (r + pid + 1) % npools; +} + +LC_INLINE void* lc_pool_steal(struct lc_pool* pool) +{ + void* elm = NULL; + int32_t steal = lc_pool_get_steal(pool); if (likely(pool->lpools[steal] != NULL)) elm = dq_pop_bot(pool->lpools[steal]); - } - return elm; + return elm; } LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) { @@ -111,26 +118,19 @@ LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) { dq_push_top(lpool, elm); } -LC_INLINE void* lc_pool_get(struct lc_pool* pool) { +LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) { int32_t pid = lc_pool_get_local(pool); struct dequeue* lpool = pool->lpools[pid]; - void *elm = NULL; - elm = dq_pop_top(lpool); + void* elm = dq_pop_top(lpool); if (elm == NULL) - elm = lc_pool_get_slow(pool); + elm = lc_pool_steal(pool); return elm; } -LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) { - int32_t pid = lc_pool_get_local(pool); - struct dequeue* lpool = pool->lpools[pid]; +LC_INLINE void* lc_pool_get(struct lc_pool* pool) { void* elm = NULL; - elm = dq_pop_top(lpool); - if (elm == NULL) { - int steal = rand() % (pool->npools); - if (likely(pool->lpools[steal] != NULL)) - elm = dq_pop_bot(pool->lpools[steal]); - } + while (elm == NULL) + elm = lc_pool_get_nb(pool); return elm; } From 5eb5330431cd95746be5ddc3021e17c7b5bf8b5a Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Thu, 4 Jun 2020 19:58:47 -0500 Subject: [PATCH 2/7] pkpool: return packets to allocating pool Populate p->context.poolid when allocating a packet and use lc_pool_put_to when returning it so that packets don't get stuck in the same pool. This reduces the number of retries needed, but may have other effects - needs testing. --- src/cq.c | 2 +- src/include/proto.h | 8 ++++---- src/include/server/server_psm2.h | 6 +++++- src/long.c | 6 +++--- src/medium.c | 9 +++------ 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/cq.c b/src/cq.c index 50a8c114..ec78e60b 100644 --- a/src/cq.c +++ b/src/cq.c @@ -14,6 +14,6 @@ lc_status lc_cq_pop(lc_ep ep, lc_req** req_ptr) lc_status lc_cq_reqfree(lc_ep ep, lc_req* req) { lc_packet* packet = (lc_packet*) req->parent; - lc_pool_put(ep->pkpool, packet); + lci_pk_free_data(ep, packet); return LC_OK; } diff --git a/src/include/proto.h b/src/include/proto.h index fe9d41a4..21df1445 100644 --- a/src/include/proto.h +++ b/src/include/proto.h @@ -109,7 +109,7 @@ static inline void lci_ce_queue(lc_ep ep, lc_packet* p) static inline void lci_handle_rtr(struct lci_ep* ep, lc_packet* p) { dprintf("Recv RTR %p\n", p); - lci_pk_init(ep, -1, LC_PROTO_LONG, p); + lci_pk_init(ep, p->context.poolid, LC_PROTO_LONG, p); // dprintf("%d] rma %p --> %p %.4x via %d\n", lcg_rank, p->data.rts.src_addr, p->data.rtr.tgt_addr, crc32c((char*) p->data.rts.src_addr, p->data.rts.size), p->data.rtr.rkey); lc_server_rma_rtr(ep->server, p->context.req->rhandle, @@ -121,7 +121,7 @@ static inline void lci_handle_rtr(struct lci_ep* ep, lc_packet* p) static inline void lci_handle_rts(struct lci_ep* ep, lc_packet* p) { dprintf("Recv RTS: %p\n", p); - lci_pk_init(ep, -1, LC_PROTO_RTR, p); + lci_pk_init(ep, p->context.poolid, LC_PROTO_RTR, p); lc_proto proto = MAKE_PROTO(ep->gid, LC_PROTO_RTR, 0); lci_prepare_rtr(ep, p->context.req->buffer, p->data.rts.size, p); lc_server_sendm(ep->server, p->context.req->rhandle, @@ -264,10 +264,10 @@ static inline void lci_serve_send(lc_packet* p) } else if (proto == LC_PROTO_LONG) { dprintf("SENT LONG: %p\n", p); p->data.rts.cb((void*) p->data.rts.ce); - lci_pk_free(ep, p); + lci_pk_free_data(ep, p); } else if (proto == LC_PROTO_RTS) { dprintf("SENT RTS: %p\n", p); - lci_pk_free(ep, p); + lci_pk_free_data(ep, p); } else { dprintf("SENT UNKNOWN: %p\n", p); lci_pk_free_data(ep, p); diff --git a/src/include/server/server_psm2.h b/src/include/server/server_psm2.h index ca47927c..1d9c512d 100644 --- a/src/include/server/server_psm2.h +++ b/src/include/server/server_psm2.h @@ -292,7 +292,10 @@ static inline int lc_server_progress(lc_server* s) lci_serve_recv_rdma(p, status.msg_tag.tag1); } else { p->context.req = &p->context.req_s; - lc_pool_put(s->pkpool, p); + if (p->context.poolid != -1) + lc_pool_put_to(s->pkpool, p, p->context.poolid); + else + lc_pool_put(s->pkpool, p); } } else if (ctx & PSM_SEND) { lc_packet* p = (lc_packet*) (ctx ^ PSM_SEND); @@ -318,6 +321,7 @@ static inline void lc_server_post_recv(lc_server* s, lc_packet* p) } psm2_mq_tag_t rtag = PSM_TAG_TRECV_DATA(); + p->context.poolid = lc_pool_get_local(s->pkpool); PSM_SAFECALL(psm2_mq_irecv2( s->mq, PSM2_MQ_ANY_ADDR, &rtag, /* message tag */ diff --git a/src/long.c b/src/long.c index 5be2670c..0e02009e 100644 --- a/src/long.c +++ b/src/long.c @@ -7,7 +7,7 @@ lc_status lc_sendl(void* src, size_t size, int rank, int tag, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, -1, LC_PROTO_RTS, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_RTS, p); struct lci_rep* rep = &(ep->rep[rank]); lci_prepare_rts(src, size, cb, ce, p); lc_server_sendm(ep->server, rep->handle, @@ -20,7 +20,7 @@ lc_status lc_putl(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, -1, LC_PROTO_LONG, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_LONG, p); p->data.rts.cb = cb; p->data.rts.ce = (uintptr_t) ce; @@ -34,7 +34,7 @@ lc_status lc_putls(void* src, size_t size, int rank, uintptr_t addr, int meta, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, -1, LC_PROTO_LONG, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_LONG, p); p->data.rts.cb = cb; p->data.rts.ce = (uintptr_t) ce; struct lci_rep* rep = &(ep->rep[rank]); diff --git a/src/medium.c b/src/medium.c index e758575c..3a0f6753 100644 --- a/src/medium.c +++ b/src/medium.c @@ -6,8 +6,7 @@ lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > 1024) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(p->data.buffer, src, size); lc_server_sendm(ep->server, rep->handle, size, p, @@ -18,8 +17,7 @@ lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > 1024) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putm(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), @@ -30,8 +28,7 @@ lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) lc_status lc_putms(void* src, size_t size, int rank, uintptr_t addr, int meta, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > 1024) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putms(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), From 22b35ff9b8e4a8959c63a04aec696335b81574e8 Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Fri, 5 Jun 2020 17:39:40 -0500 Subject: [PATCH 3/7] pkpool: config parameters for send packet return Vu noted that this should be tunable at compile time, as different applications/systems may want different behavior. Adds two compile-time parameter definitions in config.h: LC_PKT_RET_MED_SIZE: min size of med send to return to sender pool LC_PKT_RET_LONG: whether to return long send packet to sender pool --- src/include/config.h | 3 +++ src/long.c | 13 ++++++++++--- src/medium.c | 10 +++++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/include/config.h b/src/include/config.h index 7a30e213..afe86345 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -20,4 +20,7 @@ #define LC_SERVER_NUM_PKTS 1024 #define LC_CACHE_LINE 64 +#define LC_PKT_RET_MED_SIZE 1024 +// #define LC_PKT_RET_LONG + #endif diff --git a/src/long.c b/src/long.c index 0e02009e..5e5a1c02 100644 --- a/src/long.c +++ b/src/long.c @@ -1,13 +1,20 @@ #include "lc.h" +#include "config.h" #include "lc_priv.h" #include "lc/pool.h" +#ifdef LC_PKT_RET_LONG +#define LC_LONG_POOL_ID(ep) (lc_pool_get_local(ep->pkpool)) +#else +#define LC_LONG_POOL_ID(ep) (-1) +#endif + lc_status lc_sendl(void* src, size_t size, int rank, int tag, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_RTS, p); + lci_pk_init(ep, LC_LONG_POOL_ID(ep), LC_PROTO_RTS, p); struct lci_rep* rep = &(ep->rep[rank]); lci_prepare_rts(src, size, cb, ce, p); lc_server_sendm(ep->server, rep->handle, @@ -20,7 +27,7 @@ lc_status lc_putl(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_LONG, p); + lci_pk_init(ep, LC_LONG_POOL_ID(ep), LC_PROTO_LONG, p); p->data.rts.cb = cb; p->data.rts.ce = (uintptr_t) ce; @@ -34,7 +41,7 @@ lc_status lc_putls(void* src, size_t size, int rank, uintptr_t addr, int meta, lc_ep ep, lc_send_cb cb, void* ce) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_LONG, p); + lci_pk_init(ep, LC_LONG_POOL_ID(ep), LC_PROTO_LONG, p); p->data.rts.cb = cb; p->data.rts.ce = (uintptr_t) ce; struct lci_rep* rep = &(ep->rep[rank]); diff --git a/src/medium.c b/src/medium.c index 3a0f6753..8252ccce 100644 --- a/src/medium.c +++ b/src/medium.c @@ -1,4 +1,5 @@ #include "lc.h" +#include "config.h" #include "lc_priv.h" #include "lc/pool.h" @@ -6,7 +7,8 @@ lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); + lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, + LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(p->data.buffer, src, size); lc_server_sendm(ep->server, rep->handle, size, p, @@ -17,7 +19,8 @@ lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); + lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, + LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putm(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), @@ -28,7 +31,8 @@ lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) lc_status lc_putms(void* src, size_t size, int rank, uintptr_t addr, int meta, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, lc_pool_get_local(ep->pkpool), LC_PROTO_DATA, p); + lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, + LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putms(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), From 9809f958fa7b7204e07e9b798dbdaab37fe75175 Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Mon, 8 Jun 2020 19:05:45 -0500 Subject: [PATCH 4/7] pool: prevent div by 0 error with npools == 1 If pool->npools == 1, there is no valid steal target: return self pid. Also includes formatting fixes and changes discussed in #20. --- include/lc/pool.h | 49 +++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/include/lc/pool.h b/include/lc/pool.h index 6e4560ea..27767be2 100644 --- a/include/lc/pool.h +++ b/include/lc/pool.h @@ -26,7 +26,7 @@ struct dequeue; extern "C" { #endif -LC_INLINE int lc_worker_id() +LC_INLINE int lc_worker_id(void) { if (unlikely(lcg_core_id == -1)) { lcg_core_id = sched_getcpu(); @@ -90,47 +90,58 @@ LC_INLINE int32_t lc_pool_get_local(struct lc_pool* pool) return pid; } -LC_INLINE int32_t lc_pool_get_steal(struct lc_pool* pool) +LC_INLINE int32_t lc_pool_get_steal_id(struct lc_pool* pool, int32_t pid) { - int32_t pid = lc_pool_get_local(pool); - int32_t npools = pool->npools; - int32_t r = rand() % (npools - 1); - return (r + pid + 1) % npools; + int32_t npools = pool->npools; + if (npools == 1) + return pid; /* if only one pool, no one else to steal from */ + int32_t r = rand() % (npools - 1); + return (r + pid + 1) % npools; } -LC_INLINE void* lc_pool_steal(struct lc_pool* pool) +LC_INLINE void* lc_pool_steal(struct lc_pool* pool, int32_t pid) { - void* elm = NULL; - int32_t steal = lc_pool_get_steal(pool); - if (likely(pool->lpools[steal] != NULL)) - elm = dq_pop_bot(pool->lpools[steal]); - return elm; + void* elm = NULL; + int32_t target = lc_pool_get_steal_id(pool, pid); + if (target != pid && likely(pool->lpools[target] != NULL)) + elm = dq_pop_bot(pool->lpools[target]); + return elm; } -LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) { +LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) +{ int32_t pid = lc_pool_get_local(pool); struct dequeue* lpool = pool->lpools[pid]; dq_push_top(lpool, elm); } -LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) { +LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) +{ struct dequeue* lpool = pool->lpools[pid]; dq_push_top(lpool, elm); } -LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) { +LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) +{ int32_t pid = lc_pool_get_local(pool); struct dequeue* lpool = pool->lpools[pid]; void* elm = dq_pop_top(lpool); if (elm == NULL) - elm = lc_pool_steal(pool); + elm = lc_pool_steal(pool, pid); return elm; } -LC_INLINE void* lc_pool_get(struct lc_pool* pool) { +LC_INLINE void* lc_pool_get(struct lc_pool* pool) +{ + int32_t pid = lc_pool_get_local(pool); + struct dequeue* lpool = pool->lpools[pid]; void* elm = NULL; - while (elm == NULL) - elm = lc_pool_get_nb(pool); + while (elm == NULL) { + /* must try self every iteration since we never steal from self */ + elm = dq_pop_top(lpool); + if (elm == NULL) + elm = lc_pool_steal(pool, pid); + } return elm; } From 081bab4da4c9691ab5be96f3834764fbb19b87a0 Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Wed, 10 Jun 2020 16:54:14 -0500 Subject: [PATCH 5/7] pool: lc_pool_get_local -> lc_pool_get_local_id Rename lc_pool_get_local to better reflect what it actually does. --- include/lc/pool.h | 8 ++++---- src/include/server/server_ibv_helper.h | 2 +- src/include/server/server_psm2.h | 2 +- src/long.c | 2 +- src/medium.c | 11 +++++------ 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/lc/pool.h b/include/lc/pool.h index 27767be2..45886cf0 100644 --- a/include/lc/pool.h +++ b/include/lc/pool.h @@ -69,7 +69,7 @@ void* lc_pool_get_nb(lc_pool* pool); #define POOL_UNINIT ((int32_t)-1) -LC_INLINE int32_t lc_pool_get_local(struct lc_pool* pool) +LC_INLINE int32_t lc_pool_get_local_id(struct lc_pool* pool) { int wid = lc_worker_id(); int32_t pid = tls_pool_struct[wid][pool->key]; @@ -110,7 +110,7 @@ LC_INLINE void* lc_pool_steal(struct lc_pool* pool, int32_t pid) LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) { - int32_t pid = lc_pool_get_local(pool); + int32_t pid = lc_pool_get_local_id(pool); struct dequeue* lpool = pool->lpools[pid]; dq_push_top(lpool, elm); } @@ -123,7 +123,7 @@ LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) { - int32_t pid = lc_pool_get_local(pool); + int32_t pid = lc_pool_get_local_id(pool); struct dequeue* lpool = pool->lpools[pid]; void* elm = dq_pop_top(lpool); if (elm == NULL) @@ -133,7 +133,7 @@ LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) LC_INLINE void* lc_pool_get(struct lc_pool* pool) { - int32_t pid = lc_pool_get_local(pool); + int32_t pid = lc_pool_get_local_id(pool); struct dequeue* lpool = pool->lpools[pid]; void* elm = NULL; while (elm == NULL) { diff --git a/src/include/server/server_ibv_helper.h b/src/include/server/server_ibv_helper.h index ce0cb083..829e7cbb 100644 --- a/src/include/server/server_ibv_helper.h +++ b/src/include/server/server_ibv_helper.h @@ -177,7 +177,7 @@ static inline void ibv_post_recv_(lc_server* s, lc_packet* p) .num_sge = 1, }; - p->context.poolid = lc_pool_get_local(s->pkpool); + p->context.poolid = lc_pool_get_local_id(s->pkpool); struct ibv_recv_wr* bad_wr = 0; IBV_SAFECALL(ibv_post_srq_recv(s->dev_srq, &wr, &bad_wr)); diff --git a/src/include/server/server_psm2.h b/src/include/server/server_psm2.h index 1d9c512d..2a2835b9 100644 --- a/src/include/server/server_psm2.h +++ b/src/include/server/server_psm2.h @@ -321,7 +321,7 @@ static inline void lc_server_post_recv(lc_server* s, lc_packet* p) } psm2_mq_tag_t rtag = PSM_TAG_TRECV_DATA(); - p->context.poolid = lc_pool_get_local(s->pkpool); + p->context.poolid = lc_pool_get_local_id(s->pkpool); PSM_SAFECALL(psm2_mq_irecv2( s->mq, PSM2_MQ_ANY_ADDR, &rtag, /* message tag */ diff --git a/src/long.c b/src/long.c index 5e5a1c02..4265dd47 100644 --- a/src/long.c +++ b/src/long.c @@ -5,7 +5,7 @@ #include "lc/pool.h" #ifdef LC_PKT_RET_LONG -#define LC_LONG_POOL_ID(ep) (lc_pool_get_local(ep->pkpool)) +#define LC_LONG_POOL_ID(ep) (lc_pool_get_local_id(ep->pkpool)) #else #define LC_LONG_POOL_ID(ep) (-1) #endif diff --git a/src/medium.c b/src/medium.c index 8252ccce..6682960d 100644 --- a/src/medium.c +++ b/src/medium.c @@ -4,11 +4,12 @@ #include "lc_priv.h" #include "lc/pool.h" +#define LC_MED_POOL_ID(ep, size) ((size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local_id(ep->pkpool) : -1) + lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, LC_MED_POOL_ID(ep, size), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(p->data.buffer, src, size); lc_server_sendm(ep->server, rep->handle, size, p, @@ -19,8 +20,7 @@ lc_status lc_sendm(void* src, size_t size, int rank, int tag, lc_ep ep) lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, LC_MED_POOL_ID(ep, size), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putm(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), @@ -31,8 +31,7 @@ lc_status lc_putm(void* src, size_t size, int rank, uintptr_t addr, lc_ep ep) lc_status lc_putms(void* src, size_t size, int rank, uintptr_t addr, int meta, lc_ep ep) { LC_POOL_GET_OR_RETN(ep->pkpool, p); - lci_pk_init(ep, (size > LC_PKT_RET_MED_SIZE) ? lc_pool_get_local(ep->pkpool) : -1, - LC_PROTO_DATA, p); + lci_pk_init(ep, LC_MED_POOL_ID(ep, size), LC_PROTO_DATA, p); struct lci_rep* rep = &(ep->rep[rank]); memcpy(&p->data, src, size); lc_server_putms(ep->server, rep->handle, rep->base, (uint32_t) (addr - rep->base), From 7ee652c980ee9132b585b24383176c6b262980be Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Wed, 10 Jun 2020 16:56:17 -0500 Subject: [PATCH 6/7] pool: change steal API for better design --- include/lc/pool.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/include/lc/pool.h b/include/lc/pool.h index 45886cf0..04eccda7 100644 --- a/include/lc/pool.h +++ b/include/lc/pool.h @@ -90,29 +90,29 @@ LC_INLINE int32_t lc_pool_get_local_id(struct lc_pool* pool) return pid; } -LC_INLINE int32_t lc_pool_get_steal_id(struct lc_pool* pool, int32_t pid) +LC_INLINE int32_t lc_pool_get_steal_id(int32_t npools, int32_t pid) { - int32_t npools = pool->npools; if (npools == 1) - return pid; /* if only one pool, no one else to steal from */ + return -1; /* if only one pool, no one else to steal from */ int32_t r = rand() % (npools - 1); return (r + pid + 1) % npools; } -LC_INLINE void* lc_pool_steal(struct lc_pool* pool, int32_t pid) +LC_INLINE void* lc_pool_steal_from(struct lc_pool* pool, int32_t pid) { void* elm = NULL; - int32_t target = lc_pool_get_steal_id(pool, pid); - if (target != pid && likely(pool->lpools[target] != NULL)) - elm = dq_pop_bot(pool->lpools[target]); + if (likely(pool->lpools[pid] != NULL)) + elm = dq_pop_bot(pool->lpools[pid]); return elm; } -LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) +LC_INLINE void* lc_pool_steal(struct lc_pool* pool, int32_t pid) { - int32_t pid = lc_pool_get_local_id(pool); - struct dequeue* lpool = pool->lpools[pid]; - dq_push_top(lpool, elm); + void* elm = NULL; + int32_t target = lc_pool_get_steal_id(pool->npools, pid); + if (target != -1) + elm = lc_pool_steal_from(pool, pid); + return elm; } LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) @@ -121,6 +121,12 @@ LC_INLINE void lc_pool_put_to(struct lc_pool* pool, void* elm, int32_t pid) dq_push_top(lpool, elm); } +LC_INLINE void lc_pool_put(struct lc_pool* pool, void* elm) +{ + int32_t pid = lc_pool_get_local_id(pool); + lc_pool_put_to(pool, elm, pid); +} + LC_INLINE void* lc_pool_get_nb(struct lc_pool* pool) { int32_t pid = lc_pool_get_local_id(pool); @@ -136,12 +142,12 @@ LC_INLINE void* lc_pool_get(struct lc_pool* pool) int32_t pid = lc_pool_get_local_id(pool); struct dequeue* lpool = pool->lpools[pid]; void* elm = NULL; - while (elm == NULL) { + do { /* must try self every iteration since we never steal from self */ elm = dq_pop_top(lpool); if (elm == NULL) elm = lc_pool_steal(pool, pid); - } + } while (elm == NULL); return elm; } From 5d4bbe2d64cdd97c21c46842dc1934f3fcab4148 Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Wed, 10 Jun 2020 22:22:34 -0500 Subject: [PATCH 7/7] pool: stop stealing from yourself, it doesn't work --- include/lc/pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/lc/pool.h b/include/lc/pool.h index 04eccda7..f9110f79 100644 --- a/include/lc/pool.h +++ b/include/lc/pool.h @@ -111,7 +111,7 @@ LC_INLINE void* lc_pool_steal(struct lc_pool* pool, int32_t pid) void* elm = NULL; int32_t target = lc_pool_get_steal_id(pool->npools, pid); if (target != -1) - elm = lc_pool_steal_from(pool, pid); + elm = lc_pool_steal_from(pool, target); return elm; }