Remove memory access flags from cuda_async_memory_resource (#1754)

Closes #1753 It is a follow up from #1743 I would like for rapidsai/cudf#17553 to merge first, that way I don't break the build. I've learned that I was using `cudaMemPoolSetAccess` incorrectly. This API should only be used from a `peer` device, not from the device that created the pool. This is the reason why calling `cudaMemPoolSetAccess` with none throws an error as documented here #1753. I have tested that I can still export the fabric handles and import them using UCX in a peer device with the default access that pool owner device gets (read+write is the default). Note that this read+write default access cannot be revoked from the owner, as it wouldn't make sense to have memory that nobody has access to, but peers can call `cudaMemPoolSetAccess` to gain read+write access or to stop accessing (none) a peer's pool memory. Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Bradley Dice (https://github.com/bdice) URL: #1754
rapidsai · Dec 9, 2024 · 8d41610 · 8d41610
1 parent ff59ea4
commit 8d41610
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 38 deletions.
diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp
@@ -65,21 +65,6 @@ class cuda_async_memory_resource final : public device_memory_resource {
     fabric    = 0x8   ///< Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t)
   };
 
-  /**
-   * @brief Flags for specifying the memory pool accessibility from other devices.
-   *
-   * @note These values are exact copies from `cudaMemAccessFlags`. See the `cudaMemAccessFlags`
-   * docs at https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html and ensure the
-   * enum values are kept in sync with the CUDA documentation. The default, `none`, marks the pool's
-   * memory as private to the device in which it was created. `read_write` should only be used if
-   * memory sharing among devices is required. Note that there is a `cudaMemAccessFlagsProtRead`
-   * documented, but memory pools don't support read-only access, so it has been omitted.
-   */
-  enum class access_flags {
-    none       = 0,  ///< Default, make pool not accessible.
-    read_write = 3   ///< Make pool read-write accessible.
-  };
-
   /**
    * @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size
    * and release threshold.
@@ -96,14 +81,11 @@ class cuda_async_memory_resource final : public device_memory_resource {
    * @param export_handle_type Optional `cudaMemAllocationHandleType` that allocations from this
    * resource should support interprocess communication (IPC). Default is `cudaMemHandleTypeNone`
    * for no IPC support.
-   * @param access_flag Optional `cudaMemAccessFlags` that controls pool memory accessibility
-   * from other devices. Default is `cudaMemAccessFlagsProtNone` for no accessibility.
    */
   // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
   cuda_async_memory_resource(std::optional<std::size_t> initial_pool_size             = {},
                              std::optional<std::size_t> release_threshold             = {},
-                             std::optional<allocation_handle_type> export_handle_type = {},
-                             std::optional<access_flags> access_flag                  = {})
+                             std::optional<allocation_handle_type> export_handle_type = {})
   {
     // Check if cudaMallocAsync Memory pool supported
     RMM_EXPECTS(rmm::detail::runtime_async_alloc::is_supported(),
@@ -135,12 +117,6 @@ class cuda_async_memory_resource final : public device_memory_resource {
         cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
     }
 
-    if (access_flag) {
-      cudaMemAccessDesc desc = {.location = pool_props.location,
-                                .flags    = static_cast<cudaMemAccessFlags>(*access_flag)};
-      RMM_CUDA_TRY(cudaMemPoolSetAccess(pool_handle(), &desc, 1));
-    }
-
     auto const [free, total] = rmm::available_device_memory();
 
     // Need an l-value to take address to pass to cudaMemPoolSetAttribute

diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp
@@ -91,18 +91,5 @@ TEST_F(AsyncMRFabricTest, FabricHandlesSupport)
   RMM_CUDA_TRY(cudaDeviceSynchronize());
 }
 
-TEST_F(AsyncMRFabricTest, FabricHandlesSupportReadWriteShareable)
-{
-  const auto pool_init_size{100};
-  const auto pool_release_threshold{1000};
-  cuda_async_mr mr{pool_init_size,
-                   pool_release_threshold,
-                   rmm::mr::cuda_async_memory_resource::allocation_handle_type::fabric,
-                   rmm::mr::cuda_async_memory_resource::access_flags::read_write};
-  void* ptr = mr.allocate(pool_init_size);
-  mr.deallocate(ptr, pool_init_size);
-  RMM_CUDA_TRY(cudaDeviceSynchronize());
-}
-
 }  // namespace
 }  // namespace rmm::test