Skip to content

Commit

Permalink
prov/efa: Add missing locks in efa_cntr_wait.
Browse files Browse the repository at this point in the history
Currently, efa_cntr_wait call cntr->progress, which
finally call efa_ep_progress_internal(). However,
efa_ep_progress_internal() must be called inside the
srx->lock. This patch fixes this issue.

It also adds a check to handle the case where
srx_ctx could be NULL;

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Sep 12, 2023
1 parent 2d9f5bd commit 4ce0b99
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 12 deletions.
48 changes: 36 additions & 12 deletions prov/efa/src/efa_cntr.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time
int numtry = 5;
int tryid = 0;
int waitim = 1;
struct util_srx_ctx *srx_ctx;

srx_ctx = efa_cntr_get_srx_ctx(cntr_fid);

if (srx_ctx)
ofi_genlock_lock(srx_ctx->lock);

cntr = container_of(cntr_fid, struct util_cntr, cntr_fid);
assert(cntr->wait);
Expand All @@ -52,16 +58,22 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time

for (tryid = 0; tryid < numtry; ++tryid) {
cntr->progress(cntr);
if (threshold <= ofi_atomic_get64(&cntr->cnt))
return FI_SUCCESS;
if (threshold <= ofi_atomic_get64(&cntr->cnt)) {
ret = FI_SUCCESS;
goto unlock;
}

if (errcnt != ofi_atomic_get64(&cntr->err))
return -FI_EAVAIL;
if (errcnt != ofi_atomic_get64(&cntr->err)) {
ret = -FI_EAVAIL;
goto unlock;
}

if (timeout >= 0) {
timeout -= (int)(ofi_gettime_ms() - start);
if (timeout <= 0)
return -FI_ETIMEDOUT;
if (timeout <= 0) {
ret = -FI_ETIMEDOUT;
goto unlock;
}
}

ret = fi_wait(&cntr->wait->wait_fid, waitim);
Expand All @@ -71,6 +83,9 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time
waitim *= 2;
}

unlock:
if (srx_ctx)
ofi_genlock_unlock(srx_ctx->lock);
return ret;
}

Expand All @@ -81,13 +96,18 @@ static uint64_t efa_cntr_read(struct fid_cntr *cntr_fid)
uint64_t ret;

efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid);
srx_ctx = efa_cntr->util_cntr.domain->srx->ep_fid.fid.context;

ofi_genlock_lock(srx_ctx->lock);
srx_ctx = efa_cntr_get_srx_ctx(cntr_fid);

if (srx_ctx)
ofi_genlock_lock(srx_ctx->lock);

if (efa_cntr->shm_cntr)
fi_cntr_read(efa_cntr->shm_cntr);
ret = ofi_cntr_read(cntr_fid);
ofi_genlock_unlock(srx_ctx->lock);

if (srx_ctx)
ofi_genlock_unlock(srx_ctx->lock);

return ret;
}
Expand All @@ -99,13 +119,17 @@ static uint64_t efa_cntr_readerr(struct fid_cntr *cntr_fid)
uint64_t ret;

efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid);
srx_ctx = efa_cntr->util_cntr.domain->srx->ep_fid.fid.context;

ofi_genlock_lock(srx_ctx->lock);
srx_ctx = efa_cntr_get_srx_ctx(cntr_fid);

if (srx_ctx)
ofi_genlock_lock(srx_ctx->lock);
if (efa_cntr->shm_cntr)
fi_cntr_read(efa_cntr->shm_cntr);
ret = ofi_cntr_readerr(cntr_fid);
ofi_genlock_unlock(srx_ctx->lock);

if (srx_ctx)
ofi_genlock_unlock(srx_ctx->lock);

return ret;
}
Expand Down
15 changes: 15 additions & 0 deletions prov/efa/src/efa_cntr.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,20 @@ void efa_cntr_report_rx_completion(struct util_ep *ep, uint64_t flags);

void efa_cntr_report_error(struct util_ep *ep, uint64_t flags);

static inline
void *efa_cntr_get_srx_ctx(struct fid_cntr *cntr_fid)
{
struct efa_cntr *efa_cntr;
struct fid_peer_srx *srx = NULL;

efa_cntr = container_of(cntr_fid, struct efa_cntr, util_cntr.cntr_fid);

srx = efa_cntr->util_cntr.domain->srx;
if (!srx)
return NULL;

return srx->ep_fid.fid.context;
}

#endif

0 comments on commit 4ce0b99

Please sign in to comment.