From dbae8c08b0bed1d14ff1b5fe1bc5332b0c175cf8 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 2 Nov 2024 01:58:15 +0800
Subject: [PATCH] [fea] Expose the arena mr to the Python interface. (#1711)

Close https://github.com/rapidsai/rmm/issues/830 .

- Add the arena allocator to the public Python interface.
- Small changes to the logger initialization to avoid exposing spdlog in the shared objects.

Authors:
  - Jiaming Yuan (https://github.com/trivialfis)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/rmm/pull/1711
---
 .../rmm/mr/device/arena_memory_resource.hpp   | 10 ++++-
 include/rmm/mr/device/detail/arena.hpp        |  2 +-
 python/rmm/rmm/_lib/memory_resource.pxd       |  1 +
 python/rmm/rmm/_lib/memory_resource.py        |  1 +
 python/rmm/rmm/librmm/memory_resource.pxd     |  9 ++++
 python/rmm/rmm/mr.py                          |  2 +
 python/rmm/rmm/pylibrmm/memory_resource.pxd   |  3 ++
 python/rmm/rmm/pylibrmm/memory_resource.pyx   | 43 +++++++++++++++++++
 python/rmm/rmm/tests/test_rmm.py              | 22 ++++++++++
 9 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 417b7d2b4..9b380ffb9 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -97,7 +97,10 @@ class arena_memory_resource final : public device_memory_resource {
     : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
-      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+      logger_ =
+        std::make_shared<spdlog::logger>("arena_memory_dump",
+                                         std::make_shared<spdlog::sinks::basic_file_sink_mt>(
+                                           "rmm_arena_memory_dump.log", true /*truncate file*/));
       // Set the level to `debug` for more detailed output.
       logger_->set_level(spdlog::level::info);
     }
@@ -120,7 +123,10 @@ class arena_memory_resource final : public device_memory_resource {
       dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
-      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+      logger_ =
+        std::make_shared<spdlog::logger>("arena_memory_dump",
+                                         std::make_shared<spdlog::sinks::basic_file_sink_mt>(
+                                           "rmm_arena_memory_dump.log", true /*truncate file*/));
       // Set the level to `debug` for more detailed output.
       logger_->set_level(spdlog::level::info);
     }
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 6f8303c83..da64ca85b 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -647,7 +647,7 @@ class global_arena final {
    *
    * @param logger the spdlog logger to use
    */
-  void dump_memory_log(std::shared_ptr<spdlog::logger> const& logger) const
+  RMM_HIDDEN void dump_memory_log(std::shared_ptr<spdlog::logger> const& logger) const
   {
     std::lock_guard lock(mtx_);
 
diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd
index 983063914..0d11001a4 100644
--- a/python/rmm/rmm/_lib/memory_resource.pxd
+++ b/python/rmm/rmm/_lib/memory_resource.pxd
@@ -40,6 +40,7 @@ from rmm.librmm.memory_resource cimport (
     translate_python_except_to_cpp,
 )
 from rmm.pylibrmm.memory_resource cimport (
+    ArenaMemoryResource,
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
diff --git a/python/rmm/rmm/_lib/memory_resource.py b/python/rmm/rmm/_lib/memory_resource.py
index 0d47e8c9b..f3a24f635 100644
--- a/python/rmm/rmm/_lib/memory_resource.py
+++ b/python/rmm/rmm/_lib/memory_resource.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from rmm.pylibrmm.memory_resource import (  # noqa: F401
+    ArenaMemoryResource,
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd
index 9ddaf04b9..9e7b70c4f 100644
--- a/python/rmm/rmm/librmm/memory_resource.pxd
+++ b/python/rmm/rmm/librmm/memory_resource.pxd
@@ -130,6 +130,15 @@ cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
             optional[size_t] maximum_pool_size) except +
         size_t pool_size()
 
+cdef extern from "rmm/mr/device/arena_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass arena_memory_resource[Upstream](device_memory_resource):
+        arena_memory_resource(
+            Upstream* upstream_mr,
+            optional[size_t] arena_size,
+            bool dump_log_on_failure
+        ) except +
+
 cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py
index 3f0c3fce3..82729271f 100644
--- a/python/rmm/rmm/mr.py
+++ b/python/rmm/rmm/mr.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from rmm.pylibrmm.memory_resource import (
+    ArenaMemoryResource,
     BinningMemoryResource,
     CallbackMemoryResource,
     CudaAsyncMemoryResource,
@@ -45,6 +46,7 @@
 )
 
 __all__ = [
+    "ArenaMemoryResource",
     "BinningMemoryResource",
     "CallbackMemoryResource",
     "CudaAsyncMemoryResource",
diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd
index 985d5d31b..d1e5610db 100644
--- a/python/rmm/rmm/pylibrmm/memory_resource.pxd
+++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd
@@ -26,6 +26,9 @@ cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
 
     cpdef DeviceMemoryResource get_upstream(self)
 
+cdef class ArenaMemoryResource(UpstreamResourceAdaptor):
+    pass
+
 cdef class CudaMemoryResource(DeviceMemoryResource):
     pass
 
diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx
index 021125567..b41890fca 100644
--- a/python/rmm/rmm/pylibrmm/memory_resource.pyx
+++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx
@@ -49,6 +49,7 @@ from rmm.librmm.memory_resource cimport (
     CppExcept,
     allocate_callback_t,
     allocation_handle_type,
+    arena_memory_resource,
     available_device_memory as c_available_device_memory,
     binning_memory_resource,
     callback_memory_resource,
@@ -310,6 +311,48 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         )
         return c_mr.pool_size()
 
+cdef class ArenaMemoryResource(UpstreamResourceAdaptor):
+    def __cinit__(
+        self, DeviceMemoryResource upstream_mr,
+        arena_size=None,
+        dump_log_on_failure=False
+    ):
+        cdef optional[size_t] c_arena_size = (
+            optional[size_t]() if
+            arena_size is None
+            else optional[size_t](<size_t> parse_bytes(arena_size))
+        )
+        self.c_obj.reset(
+            new arena_memory_resource[device_memory_resource](
+                upstream_mr.get_mr(),
+                c_arena_size,
+                dump_log_on_failure,
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object arena_size=None,
+        bool dump_log_on_failure=False
+    ):
+        """
+        A suballocator that emphasizes fragmentation avoidance and scalable concurrency
+        support.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The DeviceMemoryResource from which to allocate memory for arenas.
+        arena_size : int, optional
+            Size in bytes of the global arena. Defaults to half of the available memory
+            on the current device.
+        dump_log_on_failure : bool, optional
+            Whether to dump the arena on allocation failure.
+        """
+        pass
+
+
 cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
     def __cinit__(
             self,
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index 9872ba89d..b52ea0179 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -505,6 +505,28 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
     array_tester(dtype, nelem, alloc)
 
 
+@pytest.mark.parametrize("dtype", _dtypes)
+@pytest.mark.parametrize("nelem", _nelems)
+@pytest.mark.parametrize("alloc", _allocs)
+@pytest.mark.parametrize(
+    "upstream_mr",
+    [
+        lambda: rmm.mr.CudaMemoryResource(),
+        lambda: rmm.mr.ManagedMemoryResource(),
+        lambda: rmm.mr.PoolMemoryResource(
+            rmm.mr.CudaMemoryResource(), 1 << 20
+        ),
+    ],
+)
+def test_arena_memory_resource(dtype, nelem, alloc, upstream_mr):
+    upstream = upstream_mr()
+    mr = rmm.mr.ArenaMemoryResource(upstream)
+
+    rmm.mr.set_current_device_resource(mr)
+    assert rmm.mr.get_current_device_resource_type() is type(mr)
+    array_tester(dtype, nelem, alloc)
+
+
 def test_reinitialize_max_pool_size():
     rmm.reinitialize(
         pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB"