From bcda3d25114adc7f9152bea7ec87f8b90b139538 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 7 Oct 2024 10:35:35 +0200 Subject: [PATCH] prov/lpp: Address coverity scan issues Address coverity scan issues in LPP and lpp_regression fabtest. Signed-off-by: Tadeusz Struk --- fabtests/prov/lpp/src/ipc.c | 10 ++++++--- fabtests/prov/lpp/src/main.c | 34 +++++++++++++++---------------- fabtests/prov/lpp/src/test_util.c | 2 +- prov/lpp/src/lpp_domain.c | 10 +++++---- prov/lpp/src/lpp_klpp.c | 4 ++-- prov/lpp/src/lpp_rx.c | 4 ++-- prov/lpp/src/lpp_stx.c | 4 ++-- prov/lpp/src/lpp_umc.c | 14 +++++++++---- 8 files changed, 47 insertions(+), 35 deletions(-) diff --git a/fabtests/prov/lpp/src/ipc.c b/fabtests/prov/lpp/src/ipc.c index 0ad136ac108..881cdf907a6 100644 --- a/fabtests/prov/lpp/src/ipc.c +++ b/fabtests/prov/lpp/src/ipc.c @@ -48,6 +48,7 @@ #include "error.h" static pthread_t info_server_pt; +static int server_running; static int info_server_port = 9000; static struct rank_info rank_info[MAX_RANK] = { 0 }; @@ -233,7 +234,7 @@ static void *info_server_thread(void *arg) int info_server_sock = setup_server(info_server_port); - while (1) { + while (server_running) { int clientsock = accept(info_server_sock, NULL, NULL); if (clientsock < 0) { error("accept"); @@ -355,7 +356,7 @@ struct rank_info *exchange_rank_info(struct rank_info *ri) if (ret) { ERROR(ri, "recv (our peer likely failed)"); } - + return pri; } @@ -471,6 +472,9 @@ void server_init(const char *peerhostname, int server_port) if (server_port > 0) { info_server_port = server_port; } + server_running = 1; get_peer_addrinfo(peerhostname); - pthread_create(&info_server_pt, NULL, info_server_thread, NULL); + if (pthread_create(&info_server_pt, NULL, info_server_thread, NULL)) { + errorx("Failed to create server thread errno: %d\n", errno); + } } diff --git a/fabtests/prov/lpp/src/main.c b/fabtests/prov/lpp/src/main.c index f27cce46234..fa099377a7d 100644 --- a/fabtests/prov/lpp/src/main.c +++ b/fabtests/prov/lpp/src/main.c @@ -57,11 +57,6 @@ #include "test.h" #include "test_util.h" -#define NUM_TESTS (sizeof(testlist) / sizeof(testlist[0])) -#define NUM_CUDA_TESTS (sizeof(cuda_testlist) / sizeof(cuda_testlist[0])) -#define NUM_ROCM_TESTS (sizeof(rocm_testlist) / sizeof(rocm_testlist[0])) -#define TOTAL_TESTS (NUM_TESTS + NUM_CUDA_TESTS + NUM_ROCM_TESTS) - static char myhostname[128]; static const char *peerhostname; static int peer_node; @@ -139,7 +134,12 @@ static const struct test rocm_testlist[] = { #endif }; -static const struct test *filtered_testlist[TOTAL_TESTS]; +static const int num_tests = (sizeof(testlist) / sizeof(testlist[0])); +static const int num_cuda_tests = (sizeof(cuda_testlist) / sizeof(cuda_testlist[0])); +static const int num_rocm_tests = (sizeof(rocm_testlist) / sizeof(rocm_testlist[0])); +static const int total_tests = ((num_tests + num_cuda_tests + num_rocm_tests)); + +static struct test *filtered_testlist[60]; static int n_include_tests; static atomic_int next_test; @@ -274,7 +274,7 @@ static void *worker_thread(void *arg) } static inline void populate_filtered_testlist(const struct test* tlist, - size_t num_tests) + size_t num_tests) { for (int i = 0; i < num_tests; i++) { if (test_filtered(tlist[i].name)) { @@ -284,7 +284,7 @@ static inline void populate_filtered_testlist(const struct test* tlist, } n_exclude_tests++; } else { - filtered_testlist[n_include_tests] = &tlist[i]; + filtered_testlist[n_include_tests] = (struct test*)&tlist[i]; n_include_tests++; } } @@ -296,16 +296,16 @@ static void run_tests(int parallel) n_include_tests = 0; n_exclude_tests = 0; - populate_filtered_testlist(testlist, NUM_TESTS); + populate_filtered_testlist(testlist, num_tests); if(run_cuda_tests) - populate_filtered_testlist(cuda_testlist, NUM_CUDA_TESTS); - else if (NUM_CUDA_TESTS > 0) + populate_filtered_testlist(cuda_testlist, num_cuda_tests); + else if (num_cuda_tests > 0) debug("skipping Cuda tests\n"); if(run_rocm_tests) - populate_filtered_testlist(rocm_testlist, NUM_ROCM_TESTS); - else if (NUM_ROCM_TESTS > 0) + populate_filtered_testlist(rocm_testlist, num_rocm_tests); + else if (num_rocm_tests > 0) debug("skipping Cuda tests\n"); if (n_include_tests == 0) { @@ -354,13 +354,13 @@ static void run_tests(int parallel) printf("============================================\n"); printf(" S U C C E S S\n"); printf("============================================\n"); - printf("%d/%ld tests done, %d filtered, %d iterations each, parallelism of %d at a time \n", - n_include_tests, TOTAL_TESTS, n_exclude_tests, iterations, + printf("%d/%d tests done, %d filtered, %d iterations each, parallelism of %d at a time \n", + n_include_tests, total_tests, n_exclude_tests, iterations, nthreads); if (!run_cuda_tests) - printf("skipped %lu cuda tests\n", NUM_CUDA_TESTS); + printf("skipped %d cuda tests\n", num_cuda_tests); if (!run_rocm_tests) - printf("skipped %lu rocm tests\n", NUM_ROCM_TESTS); + printf("skipped %d rocm tests\n", num_rocm_tests); } void usage() diff --git a/fabtests/prov/lpp/src/test_util.c b/fabtests/prov/lpp/src/test_util.c index 7757899bb82..a1fbcdd9545 100644 --- a/fabtests/prov/lpp/src/test_util.c +++ b/fabtests/prov/lpp/src/test_util.c @@ -413,7 +413,7 @@ void util_validate_cq_entry(struct rank_info *ri, free(flags_str); free(entry_flags_str); - if (flags & FI_REMOTE_CQ_DATA && tentry->data != data){ + if (flags & FI_REMOTE_CQ_DATA && tentry && tentry->data != data) { ERRORX(ri, "FI_REMOTE_CQ_DATA not properly set, expected: 0x%lx, got 0x%lx", data, tentry->data); diff --git a/prov/lpp/src/lpp_domain.c b/prov/lpp/src/lpp_domain.c index 05b257fd7ca..d441436c865 100644 --- a/prov/lpp/src/lpp_domain.c +++ b/prov/lpp/src/lpp_domain.c @@ -415,7 +415,7 @@ int lpp_fi_domain(struct fid_fabric *fabric_fid, struct fi_info *info, // Allocate space for the domain struct. if (lpp_domainp = calloc(1, sizeof(struct lpp_domain)), lpp_domainp == NULL) { FI_WARN(&lpp_prov, FI_LOG_DOMAIN, "failed to alloc domain\n"); - return -FI_ENODATA; + return -FI_ENOMEM; } fd = klpp_open(dev_index); @@ -427,14 +427,16 @@ int lpp_fi_domain(struct fid_fabric *fabric_fid, struct fi_info *info, if (klpp_getdevice(lpp_domainp->fd, &lpp_domainp->devinfo) != 0) { FI_WARN(&lpp_prov, FI_LOG_DOMAIN, "failed to get KLPP device\n"); - return -FI_ENODATA; + status = -FI_ENODATA; + goto err; } // Verify the domain attributes. if ((info != NULL) && (info->domain_attr != NULL)) { if (lpp_domain_verify_attrs(&lpp_domainp->devinfo, - info->domain_attr, OFI_VERSION_DEF_PROV) != 0) { - return -FI_ENODATA; + info->domain_attr, OFI_VERSION_DEF_PROV) != 0) { + status = -FI_ENODATA; + goto err; } lpp_domain_attrs_pop_unspec(NULL, info->domain_attr); diff --git a/prov/lpp/src/lpp_klpp.c b/prov/lpp/src/lpp_klpp.c index 8fa7e342cf3..ad3dfec9560 100644 --- a/prov/lpp/src/lpp_klpp.c +++ b/prov/lpp/src/lpp_klpp.c @@ -93,8 +93,8 @@ int klpp_getdevice(int klpp_fd, struct klppioc_lf *klpp_devinfo) int klpp_av_resolve(struct lpp_av *lpp_avp, uint32_t host_index, uint16_t port, struct lpp_fi_addr *lpp_fi_addr) { - uint64_t status = -FI_EINVAL; - struct klppioc_av klppioc = { 0 }; + int status = -FI_EINVAL; + struct klppioc_av klppioc = { 0 }; klppioc.host_index = host_index; klppioc.port = port; diff --git a/prov/lpp/src/lpp_rx.c b/prov/lpp/src/lpp_rx.c index 7066d0b99d9..4eb71694846 100644 --- a/prov/lpp/src/lpp_rx.c +++ b/prov/lpp/src/lpp_rx.c @@ -375,7 +375,7 @@ static void rx_op_comp_spool_cb(struct lpp_ep *lpp_epp, void *data, int status) static void rx_comp(struct lpp_ep *lpp_epp, struct klpp_msg_hdr *hdr, struct lpp_rx_op *rx_op, int status) { - struct klpp_msg_hdr cmpl_hdr; + struct klpp_msg_hdr cmpl_hdr = { 0 }; struct lpp_rx_op *rx_op_heap; int ret; @@ -470,7 +470,7 @@ static void recv_eager(struct lpp_ep *lpp_epp, struct lpp_rx_entry *rx_entry, static void recv_rdzv(struct lpp_ep *lpp_epp, struct lpp_rx_entry *rx_entry, struct klpp_msg_hdr *hdr, struct lpp_rx_op *rx_op) { - struct klpp_umc_u2k u2k; + struct klpp_umc_u2k u2k = { 0 }; int ret; if (!(rx_entry->lpp_flags & LPP_RX_ENTRY_MR)) { diff --git a/prov/lpp/src/lpp_stx.c b/prov/lpp/src/lpp_stx.c index bfbeae73e5a..c7591ba3d10 100644 --- a/prov/lpp/src/lpp_stx.c +++ b/prov/lpp/src/lpp_stx.c @@ -304,7 +304,7 @@ int lpp_fi_stx_close(struct fid *fid) static void lpp_tx_rdzv_cancel(struct lpp_ep *lpp_epp, struct lpp_tx_entry *tx_entry) { struct lpp_stx *lpp_stxp = lpp_epp->stx; - struct klpp_umc_u2k u2k; + struct klpp_umc_u2k u2k = { 0 }; u2k.type = KLPP_U2K_RDZV_SEND_CANCEL; u2k.rdzv_send.token = lpp_tx_entry_to_token(lpp_stxp, tx_entry); @@ -719,7 +719,7 @@ ssize_t lpp_send_common(struct lpp_ep *lpp_epp, struct lpp_tx_entry *tx_entry) { struct lpp_stx *lpp_stxp = lpp_epp->stx; struct lpp_cq *lpp_cqp = lpp_epp->cq_transmit; - struct klpp_umc_u2k u2k; + struct klpp_umc_u2k u2k = { 0 }; size_t gpu_cnt; ssize_t ret; diff --git a/prov/lpp/src/lpp_umc.c b/prov/lpp/src/lpp_umc.c index c87f1bfee92..03e56b7a33b 100644 --- a/prov/lpp/src/lpp_umc.c +++ b/prov/lpp/src/lpp_umc.c @@ -255,6 +255,7 @@ __lpp_rumc_get_insert(struct lpp_ep *lpp_epp, int node_id, int umc_id, bool inse struct klpp_mc_params *mc_params = &lpp_epp->domain->devinfo.mc_params; struct lpp_umc *umc = lpp_epp->umc; struct lpp_umc_remote *rumc_arr; + int ret; rumc_arr = ofi_idm_lookup(&umc->rumc_idm, node_id); if (OFI_UNLIKELY(rumc_arr == NULL && insert)) { @@ -275,7 +276,12 @@ __lpp_rumc_get_insert(struct lpp_ep *lpp_epp, int node_id, int umc_id, bool inse rumc->last_rx_time = 0; rumc->flags = 0; } - ofi_idm_set(&umc->rumc_idm, node_id, rumc_arr); + ret = ofi_idm_set(&umc->rumc_idm, node_id, rumc_arr); + if (ret == -1) { + FI_WARN(&lpp_prov, FI_LOG_EP_CTRL, "ofi_idm_set failed\n"); + free(rumc_arr); + return NULL; + } } return rumc_arr ? &rumc_arr[umc_id] : NULL; } @@ -583,7 +589,7 @@ int lpp_umc_tx_cmpl(struct lpp_ep *lpp_epp, struct lpp_fi_addr addr, struct lpp_spooled_msg_dqp *dmsg; struct lpp_spooled_msg_srq *smsg; struct klpp_ioc_kmc_send *ioc; - struct iovec iov; + struct iovec iov = { 0 }; size_t iov_count; int ret; @@ -813,11 +819,11 @@ void lpp_umc_dqp_teardown(struct lpp_ep *lpp_epp, struct klpp_umc_k2u *k2u) { struct lpp_umc *umc = lpp_epp->umc; struct lpp_umc_remote *rumc; - struct klpp_umc_u2k u2k; + struct klpp_umc_u2k u2k = { 0 }; struct lpp_umc_dqp *dqp; rumc = lpp_rumc_get(lpp_epp, k2u->dqp.remote_node_id, - k2u->dqp.remote_umc_id); + k2u->dqp.remote_umc_id); dqp = &umc->dqps[k2u->dqp.local_dqp_id]; /* Ensure all remaining messages have been scooped up before we trash