Skip to content

Commit

Permalink
prov/efa: Disable resource management when user set FI_OPT_EFA_RNR_RETRY
Browse files Browse the repository at this point in the history
If a user is explicitly asking for a retry count that means they
also want to manage the resources themselves and the EFA provider
should disable resource management to prevent implicit retries.

Also remove the unused function.

Signed-off-by: Jessie Yang <[email protected]>
  • Loading branch information
jiaxiyan committed Nov 20, 2024
1 parent b30ce18 commit 3b3b26c
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 23 deletions.
8 changes: 6 additions & 2 deletions fabtests/prov/efa/src/efa_rnr_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,13 @@ int ft_efa_rnr_init_fabric()
}

fprintf(stdout, "Setting RNR retry count to %zu ...\n", rnr_retry);
ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_RNR_RETRY, &rnr_retry, sizeof(rnr_retry));
/*
* Cannot use setopt FI_OPT_EFA_RNR_RETRY here because it sets FI_RM_DISABLED,
* which conflicts with the queue/resend test that requires FI_RM_ENABLED.
*/
ret = setenv("FI_EFA_RNR_RETRY", "0", 1);
if (ret) {
FT_PRINTERR("fi_setopt", -ret);
FT_PRINTERR("setenv", -ret);
return ret;
}
fprintf(stdout, "RNR retry count has been set to %zu.\n", rnr_retry);
Expand Down
21 changes: 0 additions & 21 deletions prov/efa/src/rdm/efa_rdm_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,27 +263,6 @@ struct efa_domain *efa_rdm_ep_domain(struct efa_rdm_ep *ep)

void efa_rdm_ep_post_internal_rx_pkts(struct efa_rdm_ep *ep);

/**
* @brief return whether this endpoint should write error cq entry for RNR.
*
* For an endpoint to write RNR completion, two conditions must be met:
*
* First, the end point must be able to receive RNR completion from rdma-core,
* which means rnr_etry must be less then EFA_RNR_INFINITE_RETRY.
*
* Second, the app need to request this feature when opening endpoint
* (by setting info->domain_attr->resource_mgmt to FI_RM_DISABLED).
* The setting was saved as efa_rdm_ep->handle_resource_management.
*
* @param[in] ep endpoint
*/
static inline
bool efa_rdm_ep_should_write_rnr_completion(struct efa_rdm_ep *ep)
{
return (efa_env.rnr_retry < EFA_RNR_INFINITE_RETRY) &&
(ep->handle_resource_management == FI_RM_DISABLED);
}

/*
* @brief: check whether we should use p2p for this transaction
*
Expand Down
6 changes: 6 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,12 @@ static int efa_rdm_ep_setopt(fid_t fid, int level, int optname,
return -FI_ENOSYS;
}
efa_rdm_ep->base_ep.rnr_retry = *(size_t *)optval;
/*
* If a user is explicitly asking for a retry count that means
* they also want to manage the resources themselves and the EFA
* provider should disable resource management to prevent implicit retries.
*/
efa_rdm_ep->handle_resource_management = FI_RM_DISABLED;
break;
case FI_OPT_FI_HMEM_P2P:
if (optlen != sizeof(int))
Expand Down
25 changes: 25 additions & 0 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,31 @@ void test_efa_rdm_ep_setopt_shared_memory_permitted(struct efa_resource **state)
assert_null(ep->shm_ep);
}

/* Test resource management is disabled when user sets FI_OPT_EFA_RNR_RETRY */
void test_efa_rdm_ep_setopt_rnr_retry(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_rdm_ep *ep;
size_t rnr_retry = 5;

resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);

efa_unit_test_resource_construct_with_hints(
resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false,
true);

assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT,
FI_OPT_EFA_RNR_RETRY, &rnr_retry,
sizeof(rnr_retry)), 0);

assert_int_equal(fi_enable(resource->ep), 0);

ep = container_of(resource->ep, struct efa_rdm_ep,
base_ep.util_ep.ep_fid);

assert_int_equal(ep->handle_resource_management, FI_RM_DISABLED);
}

/**
* @brief Test fi_enable with different optval of fi_setopt for
* FI_OPT_EFA_WRITE_IN_ORDER_ALIGNED_128_BYTES optname.
Expand Down
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_undersized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_getopt_oversized_optlen, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_setopt_shared_memory_permitted, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_setopt_rnr_retry, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_pkt_pool_flags, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ void test_efa_rdm_ep_send_with_shm_no_copy();
void test_efa_rdm_ep_rma_without_caps();
void test_efa_rdm_ep_atomic_without_caps();
void test_efa_rdm_ep_setopt_shared_memory_permitted();
void test_efa_rdm_ep_setopt_rnr_retry();
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good();
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad();
void test_efa_rdm_ep_user_zcpy_rx_disabled();
Expand Down

0 comments on commit 3b3b26c

Please sign in to comment.