From 38adf06ccdcc385258ffdb0c1d7d498dee26a901 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Mon, 9 Dec 2024 23:48:59 +0000 Subject: [PATCH] prov/efa: fix efa multi recv setopt segfault efa uses the util srx and sets the minimum multi receive size through the srx. However, the srx code doesn't get initialized until the endpoint gets enabled. So if the application calls setopt (before FI_ENABLE), this will segfault because the srx has not been initialized. Instead, we need to save the multi recv size in the efa endpoint to be valid during setopt and then pass that into the util_srx creation to set the multi recv size Signed-off-by: Alexia Ingerson Signed-off-by: Shi Jin --- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 3 --- prov/efa/test/efa_unit_test_srx.c | 12 +++++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 98e1d0b4375..47b3f53f9bd 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -1650,7 +1650,6 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, { struct efa_rdm_ep *efa_rdm_ep; int intval, ret; - struct util_srx_ctx *srx; efa_rdm_ep = container_of(fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); @@ -1663,8 +1662,6 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname, return -FI_EINVAL; efa_rdm_ep->min_multi_recv_size = *(size_t *)optval; - srx = util_get_peer_srx(efa_rdm_ep->peer_srx_ep)->ep_fid.fid.context; - srx->min_multi_recv_size = *(size_t *)optval; break; case FI_OPT_EFA_RNR_RETRY: if (optlen != sizeof(size_t)) diff --git a/prov/efa/test/efa_unit_test_srx.c b/prov/efa/test/efa_unit_test_srx.c index 733faa67d57..e0bff95169b 100644 --- a/prov/efa/test/efa_unit_test_srx.c +++ b/prov/efa/test/efa_unit_test_srx.c @@ -18,21 +18,19 @@ void test_efa_srx_min_multi_recv_size(struct efa_resource **state) struct util_srx_ctx *srx_ctx; size_t min_multi_recv_size_new; - efa_unit_test_resource_construct(resource, FI_EP_RDM); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); - /* - * After ep is enabled, the srx->min_multi_recv_size should be - * exactly the same with ep->min_multi_recv_size - */ - assert_true(efa_rdm_ep->min_multi_recv_size == srx_ctx->min_multi_recv_size); /* Set a new min_multi_recv_size via setopt*/ min_multi_recv_size_new = 1024; assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, &min_multi_recv_size_new, sizeof(min_multi_recv_size_new)), 0); + /* Enable EP */ + assert_int_equal(fi_enable(resource->ep), FI_SUCCESS); + /* Check whether srx->min_multi_recv_size is set correctly */ + srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); assert_true(srx_ctx->min_multi_recv_size == min_multi_recv_size_new); }