From dbae8c08b0bed1d14ff1b5fe1bc5332b0c175cf8 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 2 Nov 2024 01:58:15 +0800 Subject: [PATCH] [fea] Expose the arena mr to the Python interface. (#1711) Close https://github.com/rapidsai/rmm/issues/830 . - Add the arena allocator to the public Python interface. - Small changes to the logger initialization to avoid exposing spdlog in the shared objects. Authors: - Jiaming Yuan (https://github.com/trivialfis) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1711 --- .../rmm/mr/device/arena_memory_resource.hpp | 10 ++++- include/rmm/mr/device/detail/arena.hpp | 2 +- python/rmm/rmm/_lib/memory_resource.pxd | 1 + python/rmm/rmm/_lib/memory_resource.py | 1 + python/rmm/rmm/librmm/memory_resource.pxd | 9 ++++ python/rmm/rmm/mr.py | 2 + python/rmm/rmm/pylibrmm/memory_resource.pxd | 3 ++ python/rmm/rmm/pylibrmm/memory_resource.pyx | 43 +++++++++++++++++++ python/rmm/rmm/tests/test_rmm.py | 22 ++++++++++ 9 files changed, 90 insertions(+), 3 deletions(-) diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 417b7d2b4..9b380ffb9 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -97,7 +97,10 @@ class arena_memory_resource final : public device_memory_resource { : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { - logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + logger_ = + std::make_shared("arena_memory_dump", + std::make_shared( + "rmm_arena_memory_dump.log", true /*truncate file*/)); // Set the level to `debug` for more detailed output. logger_->set_level(spdlog::level::info); } @@ -120,7 +123,10 @@ class arena_memory_resource final : public device_memory_resource { dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { - logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + logger_ = + std::make_shared("arena_memory_dump", + std::make_shared( + "rmm_arena_memory_dump.log", true /*truncate file*/)); // Set the level to `debug` for more detailed output. logger_->set_level(spdlog::level::info); } diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 6f8303c83..da64ca85b 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -647,7 +647,7 @@ class global_arena final { * * @param logger the spdlog logger to use */ - void dump_memory_log(std::shared_ptr const& logger) const + RMM_HIDDEN void dump_memory_log(std::shared_ptr const& logger) const { std::lock_guard lock(mtx_); diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 983063914..0d11001a4 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -40,6 +40,7 @@ from rmm.librmm.memory_resource cimport ( translate_python_except_to_cpp, ) from rmm.pylibrmm.memory_resource cimport ( + ArenaMemoryResource, BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, diff --git a/python/rmm/rmm/_lib/memory_resource.py b/python/rmm/rmm/_lib/memory_resource.py index 0d47e8c9b..f3a24f635 100644 --- a/python/rmm/rmm/_lib/memory_resource.py +++ b/python/rmm/rmm/_lib/memory_resource.py @@ -13,6 +13,7 @@ # limitations under the License. from rmm.pylibrmm.memory_resource import ( # noqa: F401 + ArenaMemoryResource, BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd index 9ddaf04b9..9e7b70c4f 100644 --- a/python/rmm/rmm/librmm/memory_resource.pxd +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -130,6 +130,15 @@ cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ optional[size_t] maximum_pool_size) except + size_t pool_size() +cdef extern from "rmm/mr/device/arena_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass arena_memory_resource[Upstream](device_memory_resource): + arena_memory_resource( + Upstream* upstream_mr, + optional[size_t] arena_size, + bool dump_log_on_failure + ) except + + cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource): diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 3f0c3fce3..82729271f 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from rmm.pylibrmm.memory_resource import ( + ArenaMemoryResource, BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, @@ -45,6 +46,7 @@ ) __all__ = [ + "ArenaMemoryResource", "BinningMemoryResource", "CallbackMemoryResource", "CudaAsyncMemoryResource", diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd index 985d5d31b..d1e5610db 100644 --- a/python/rmm/rmm/pylibrmm/memory_resource.pxd +++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd @@ -26,6 +26,9 @@ cdef class UpstreamResourceAdaptor(DeviceMemoryResource): cpdef DeviceMemoryResource get_upstream(self) +cdef class ArenaMemoryResource(UpstreamResourceAdaptor): + pass + cdef class CudaMemoryResource(DeviceMemoryResource): pass diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx index 021125567..b41890fca 100644 --- a/python/rmm/rmm/pylibrmm/memory_resource.pyx +++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx @@ -49,6 +49,7 @@ from rmm.librmm.memory_resource cimport ( CppExcept, allocate_callback_t, allocation_handle_type, + arena_memory_resource, available_device_memory as c_available_device_memory, binning_memory_resource, callback_memory_resource, @@ -310,6 +311,48 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): ) return c_mr.pool_size() +cdef class ArenaMemoryResource(UpstreamResourceAdaptor): + def __cinit__( + self, DeviceMemoryResource upstream_mr, + arena_size=None, + dump_log_on_failure=False + ): + cdef optional[size_t] c_arena_size = ( + optional[size_t]() if + arena_size is None + else optional[size_t]( parse_bytes(arena_size)) + ) + self.c_obj.reset( + new arena_memory_resource[device_memory_resource]( + upstream_mr.get_mr(), + c_arena_size, + dump_log_on_failure, + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + object arena_size=None, + bool dump_log_on_failure=False + ): + """ + A suballocator that emphasizes fragmentation avoidance and scalable concurrency + support. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The DeviceMemoryResource from which to allocate memory for arenas. + arena_size : int, optional + Size in bytes of the global arena. Defaults to half of the available memory + on the current device. + dump_log_on_failure : bool, optional + Whether to dump the arena on allocation failure. + """ + pass + + cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): def __cinit__( self, diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 9872ba89d..b52ea0179 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -505,6 +505,28 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): array_tester(dtype, nelem, alloc) +@pytest.mark.parametrize("dtype", _dtypes) +@pytest.mark.parametrize("nelem", _nelems) +@pytest.mark.parametrize("alloc", _allocs) +@pytest.mark.parametrize( + "upstream_mr", + [ + lambda: rmm.mr.CudaMemoryResource(), + lambda: rmm.mr.ManagedMemoryResource(), + lambda: rmm.mr.PoolMemoryResource( + rmm.mr.CudaMemoryResource(), 1 << 20 + ), + ], +) +def test_arena_memory_resource(dtype, nelem, alloc, upstream_mr): + upstream = upstream_mr() + mr = rmm.mr.ArenaMemoryResource(upstream) + + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) + array_tester(dtype, nelem, alloc) + + def test_reinitialize_max_pool_size(): rmm.reinitialize( pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB"