diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9d35e3f97..5d1d53670 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -26,5 +26,5 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 3bfef6706..549ffa67b 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 5bfc30823..d6dd7b6ce 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 925557b22..17e8d5cd0 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 2f9e1c493..54964d880 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9b7efecde..6fa11225e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -68,7 +68,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build-cpp: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: ${{ inputs.build_type || 'branch' }} @@ -79,7 +79,7 @@ jobs: wheel-build-python: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-publish-cpp: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -100,7 +100,7 @@ jobs: wheel-publish-python: needs: wheel-build-python secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1160b93e9..6780298c3 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,51 +12,91 @@ concurrency: jobs: pr-builder: needs: + - changed-files - checks - conda-cpp-build - conda-cpp-tests - conda-python-build - conda-python-tests - docs-build + - telemetry-setup - wheel-build-cpp - wheel-build-python - wheel-tests - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + if: always() + with: + needs: ${{ toJSON(needs) }} + telemetry-setup: + runs-on: ubuntu-latest + continue-on-error: true + env: + OTEL_SERVICE_NAME: "pr-rmm" + steps: + - name: Telemetry setup + uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main + changed-files: + needs: + - telemetry-setup + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + with: + files_yaml: | + test_cpp: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!img/**' + - '!python/**' + test_python: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!img/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + needs: + - telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false + ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: pull-request conda-cpp-tests: - needs: conda-cpp-build + needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -66,7 +106,7 @@ jobs: wheel-build-cpp: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: pull-request @@ -74,20 +114,23 @@ jobs: wheel-build-python: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_python.sh wheel-tests: - needs: wheel-build-python + needs: [wheel-build-python, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + needs: + - telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 with: arch: '["amd64"]' cuda: '["12.5"]' @@ -95,3 +138,18 @@ jobs: sccache -z; build-all -DBUILD_BENCHMARKS=ON --verbose; sccache -s; + + telemetry-summarize: + runs-on: ubuntu-latest + needs: pr-builder + if: always() + continue-on-error: true + steps: + - name: Load stashed telemetry env vars + uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main + with: + load_service_name: true + - name: Telemetry summarize + uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main + with: + cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 13838d888..34a0f746d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.gitignore b/.gitignore index 2d0b150e1..df9d920d5 100644 --- a/.gitignore +++ b/.gitignore @@ -22,10 +22,13 @@ rmm.egg-info/ python/build python/*/build python/rmm/docs/_build -python/rmm/**/_lib/**/*.cpp -!python/rmm/_lib/_torch_allocator.cpp -python/rmm/**/_lib/**/*.h -python/rmm/**/_lib/.nfs* +python/rmm/**/librmm/**/*.cpp +!python/rmm/librmm/_torch_allocator.cpp +python/rmm/**/librmm/**/*.h +python/rmm/**/librmm/.nfs* +python/rmm/**/pylibrmm/**/*.cpp +python/rmm/**/pylibrmm/**/*.h +python/rmm/**/pylibrmm/.nfs* python/rmm/_cuda/*.cpp python/rmm/tests/*.cpp python/rmm/*.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f114abec4..56c972b4e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -82,7 +82,7 @@ repos: - id: verify-copyright - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 1268762b2..817a0c8c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,48 @@ +# rmm 24.12.00 (11 Dec 2024) + +## 🚨 Breaking Changes + +- Deprecate support for directly accessing logger ([#1690](https://github.com/rapidsai/rmm/pull/1690)) [@vyasr](https://github.com/vyasr) + +## 🐛 Bug Fixes + +- Query total memory in failure_callback_resource_adaptor tests ([#1734](https://github.com/rapidsai/rmm/pull/1734)) [@harrism](https://github.com/harrism) +- Treat deprecation warnings as errors and fix deprecation warnings in replay benchmark ([#1728](https://github.com/rapidsai/rmm/pull/1728)) [@harrism](https://github.com/harrism) +- Disallow cuda-python 12.6.1 and 11.8.4 ([#1720](https://github.com/rapidsai/rmm/pull/1720)) [@bdice](https://github.com/bdice) +- Fix typos in .gitignore ([#1697](https://github.com/rapidsai/rmm/pull/1697)) [@charlesbluca](https://github.com/charlesbluca) +- Fix `rmm ._lib` imports ([#1693](https://github.com/rapidsai/rmm/pull/1693)) [@Matt711](https://github.com/Matt711) + +## 📖 Documentation + +- Fix docs warning ([#1706](https://github.com/rapidsai/rmm/pull/1706)) [@bdice](https://github.com/bdice) +- Update cross-link to cuda-python object ([#1699](https://github.com/rapidsai/rmm/pull/1699)) [@wence-](https://github.com/wence-) + +## 🚀 New Features + +- Correct rmm tests for validity of device pointers ([#1714](https://github.com/rapidsai/rmm/pull/1714)) [@robertmaynard](https://github.com/robertmaynard) +- Update rmm tests to use rapids_cmake_support_conda_env ([#1707](https://github.com/rapidsai/rmm/pull/1707)) [@robertmaynard](https://github.com/robertmaynard) +- adding telemetry ([#1692](https://github.com/rapidsai/rmm/pull/1692)) [@msarahan](https://github.com/msarahan) +- Make `cudaMallocAsync` logic non-optional as we require CUDA 11.2+ ([#1667](https://github.com/rapidsai/rmm/pull/1667)) [@robertmaynard](https://github.com/robertmaynard) + +## 🛠️ Improvements + +- enforce wheel size limits, README formatting in CI ([#1726](https://github.com/rapidsai/rmm/pull/1726)) [@jameslamb](https://github.com/jameslamb) +- Remove all explicit usage of fmtlib ([#1724](https://github.com/rapidsai/rmm/pull/1724)) [@harrism](https://github.com/harrism) +- WIP: put a ceiling on cuda-python ([#1723](https://github.com/rapidsai/rmm/pull/1723)) [@jameslamb](https://github.com/jameslamb) +- use rapids-generate-pip-constraints to pin to oldest dependencies in CI ([#1716](https://github.com/rapidsai/rmm/pull/1716)) [@jameslamb](https://github.com/jameslamb) +- Deprecate `rmm._lib` ([#1713](https://github.com/rapidsai/rmm/pull/1713)) [@Matt711](https://github.com/Matt711) +- print sccache stats in builds ([#1712](https://github.com/rapidsai/rmm/pull/1712)) [@jameslamb](https://github.com/jameslamb) +- [fea] Expose the arena mr to the Python interface. ([#1711](https://github.com/rapidsai/rmm/pull/1711)) [@trivialfis](https://github.com/trivialfis) +- devcontainer: replace `VAULT_HOST` with `AWS_ROLE_ARN` ([#1708](https://github.com/rapidsai/rmm/pull/1708)) [@jjacobelli](https://github.com/jjacobelli) +- make conda installs in CI stricter (part 2) ([#1703](https://github.com/rapidsai/rmm/pull/1703)) [@jameslamb](https://github.com/jameslamb) +- Add BUILD_SHARED_LIBS option defaulting to ON ([#1702](https://github.com/rapidsai/rmm/pull/1702)) [@wence-](https://github.com/wence-) +- make conda installs in CI stricter ([#1696](https://github.com/rapidsai/rmm/pull/1696)) [@jameslamb](https://github.com/jameslamb) +- Prune workflows based on changed files ([#1695](https://github.com/rapidsai/rmm/pull/1695)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Deprecate support for directly accessing logger ([#1690](https://github.com/rapidsai/rmm/pull/1690)) [@vyasr](https://github.com/vyasr) +- Use `rmm::percent_of_free_device_memory` in arena test ([#1689](https://github.com/rapidsai/rmm/pull/1689)) [@wence-](https://github.com/wence-) +- exclude 'gcovr' from list of development pip packages ([#1688](https://github.com/rapidsai/rmm/pull/1688)) [@jameslamb](https://github.com/jameslamb) +- [Improvement] Reorganize Cython to separate C++ bindings and make Cython classes public ([#1676](https://github.com/rapidsai/rmm/pull/1676)) [@Matt711](https://github.com/Matt711) + # rmm 24.10.00 (9 Oct 2024) ## 🚨 Breaking Changes diff --git a/CMakeLists.txt b/CMakeLists.txt index 39d5dccde..07bd368ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,9 @@ rapids_cmake_build_type(Release) option(RMM_NVTX "Build RMM with NVTX support" OFF) option(BUILD_TESTS "Configure CMake to build tests" ON) option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF) +# This is mostly so that dependent libraries are configured in shared mode for downstream dependents +# of RMM that get their common dependencies transitively. +option(BUILD_SHARED_LIBS "Build RMM shared libraries" ON) set(RMM_LOGGING_LEVEL "INFO" CACHE STRING "Choose the logging level.") @@ -70,7 +73,6 @@ rapids_find_package( # add third party dependencies using CPM rapids_cpm_init() -include(cmake/thirdparty/get_fmt.cmake) include(cmake/thirdparty/get_spdlog.cmake) include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_nvtx.cmake) @@ -87,13 +89,11 @@ target_include_directories(rmm INTERFACE "$:-Wall -Werror - -Wno-error=deprecated-declarations -Wno-unknown-pragmas>) + target_compile_options(${BENCH_NAME} PUBLIC $<$:-Wall -Werror + -Wno-unknown-pragmas>) if(DISABLE_DEPRECATION_WARNING) target_compile_options( ${BENCH_NAME} PUBLIC $<$:-Xcompiler=-Wno-deprecated-declarations>) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 86e761c80..b5edbb536 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -133,9 +133,7 @@ static void benchmark_range(benchmark::internal::Benchmark* bench) MRFactoryFunc get_mr_factory(std::string const& resource_name) { if (resource_name == "cuda") { return &make_cuda; } -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (resource_name == "cuda_async") { return &make_cuda_async; } -#endif if (resource_name == "pool") { return &make_pool; } if (resource_name == "arena") { return &make_arena; } if (resource_name == "binning") { return &make_binning; } @@ -153,13 +151,11 @@ void declare_benchmark(std::string const& name) return; } -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (name == "cuda_async") { BENCHMARK_CAPTURE(BM_MultiStreamAllocations, cuda_async, &make_cuda_async) // ->Apply(benchmark_range); return; } -#endif if (name == "pool") { BENCHMARK_CAPTURE(BM_MultiStreamAllocations, pool_mr, &make_pool) // @@ -248,9 +244,7 @@ int main(int argc, char** argv) resource_names.emplace_back(args["resource"].as()); } else { resource_names.emplace_back("cuda"); -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT resource_names.emplace_back("cuda_async"); -#endif resource_names.emplace_back("pool"); resource_names.emplace_back("arena"); resource_names.emplace_back("binning"); diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 57116743b..2971f7e40 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -316,9 +316,7 @@ int main(int argc, char** argv) std::map const funcs({{"arena", &make_arena}, {"binning", &make_binning}, {"cuda", &make_cuda}, -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT {"cuda_async", &make_cuda_async}, -#endif {"pool", &make_pool}}); auto resource = args["resource"].as(); @@ -340,11 +338,7 @@ int main(int argc, char** argv) std::string mr_name = args["resource"].as(); declare_benchmark(mr_name); } else { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; -#else - std::vector mrs{"pool", "binning", "arena", "cuda"}; -#endif std::for_each( std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); } diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 5afed036a..7f45b7691 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -172,7 +173,7 @@ struct replay_benchmark { void SetUp(const ::benchmark::State& state) { if (state.thread_index() == 0) { - rmm::logger().log(spdlog::level::info, "------ Start of Benchmark -----"); + RMM_LOG_INFO("------ Start of Benchmark -----"); mr_ = factory_(simulated_size_); } } @@ -181,7 +182,7 @@ struct replay_benchmark { void TearDown(const ::benchmark::State& state) { if (state.thread_index() == 0) { - rmm::logger().log(spdlog::level::info, "------ End of Benchmark -----"); + RMM_LOG_INFO("------ End of Benchmark -----"); // clean up any leaked allocations std::size_t total_leaked{0}; std::size_t num_leaked{0}; @@ -402,7 +403,7 @@ int main(int argc, char** argv) auto const num_threads = per_thread_events.size(); // Uncomment to enable / change default log level - // rmm::logger().set_level(spdlog::level::trace); + // rmm::detail::logger().set_level(spdlog::level::trace); if (args.count("resource") > 0) { std::string mr_name = args["resource"].as(); diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp index 2283ace93..4dfa5bae4 100644 --- a/benchmarks/utilities/log_parser.hpp +++ b/benchmarks/utilities/log_parser.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -151,7 +151,7 @@ inline std::vector parse_csv(std::string const& filename) auto parse_pointer = [](std::string const& str, uintptr_t& ptr) { auto const base{16}; - ptr = std::stoll(str, nullptr, base); + ptr = (str == "(nil)") ? 0 : std::stoll(str, nullptr, base); }; std::vector pointers = csv.GetColumn("Pointer", parse_pointer); diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index a601ecaae..9d14cd072 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,7 +15,11 @@ rapids-print-env rapids-logger "Begin cpp build" +sccache --zero-stats + # This calls mambabuild when boa is installed (as is the case in the CI images) RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild conda/recipes/librmm +sccache --show-adv-stats + rapids-upload-conda-to-s3 cpp diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 970417c1d..844dae1c6 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -6,6 +6,9 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" +export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -23,11 +26,9 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - rmm librmm + "rmm=${RAPIDS_VERSION}" \ + "librmm=${RAPIDS_VERSION}" -export RAPIDS_VERSION="$(rapids-version)" -export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" -export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" @@ -44,4 +45,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/rmm/html" mv _build/dirhtml/* "${RAPIDS_DOCS_DIR}/rmm/html" popd -rapids-upload-docs +RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs diff --git a/ci/build_python.sh b/ci/build_python.sh index fcd2c55e7..7a9df5fc7 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -19,7 +19,11 @@ rapids-logger "Begin py build" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) +sccache --zero-stats + # This calls mambabuild when boa is installed (as is the case in the CI images) RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild -c "${CPP_CHANNEL}" conda/recipes/rmm +sccache --show-adv-stats + rapids-upload-conda-to-s3 python diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh index 2c5cc0560..1ec979372 100755 --- a/ci/build_wheel_cpp.sh +++ b/ci/build_wheel_cpp.sh @@ -14,7 +14,15 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" cd "${package_dir}" -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +sccache --zero-stats + +python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check + +sccache --show-adv-stats + python -m pip install wheel python -m wheel tags --platform any dist/* --remove + +../../ci/validate_wheel.sh dist + RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp dist diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh index 555974b50..4e4d3bf61 100755 --- a/ci/build_wheel_python.sh +++ b/ci/build_wheel_python.sh @@ -22,12 +22,18 @@ CPP_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-down # are used when created the isolated build environment echo "librmm-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${CPP_WHEELHOUSE}/librmm_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" > ./build-constraints.txt +sccache --zero-stats + PIP_CONSTRAINT="${PWD}/build-constraints.txt" \ - python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check + python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check + +sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist dist/* +../../ci/validate_wheel.sh final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist # switch back to the root of the repo and check symbol visibility diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 9ad1c9536..975477a6e 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -29,7 +31,8 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - librmm librmm-tests + "librmm=${RAPIDS_VERSION}" \ + "librmm-tests=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python.sh b/ci/test_python.sh index 386d0b063..51d0a48c3 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -9,6 +9,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-dependency-file-generator \ --output conda \ --file-key test_python \ @@ -28,7 +30,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - rmm librmm + "rmm=${RAPIDS_VERSION}" \ + "librmm=${RAPIDS_VERSION}" RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index d06c4eed0..2f39b197b 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -7,15 +7,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" WHEELHOUSE="${PWD}/dist/" RAPIDS_PY_WHEEL_NAME="rmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python "${WHEELHOUSE}" -# Constraint to minimum dependency versions if job is set up as "oldest" -echo "" > ./constraints.txt -if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then - rapids-dependency-file-generator \ - --output requirements \ - --file-key test_python \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ - | tee ./constraints.txt -fi +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints test_python ./constraints.txt # echo to expand wildcard before adding '[extra]' requires for pip python -m pip install \ diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 000000000..60a80fce6 --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +wheel_dir_relative_path=$1 + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/cmake/thirdparty/get_fmt.cmake b/cmake/thirdparty/get_fmt.cmake deleted file mode 100644 index 5787fb73f..000000000 --- a/cmake/thirdparty/get_fmt.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# Use CPM to find or clone fmt -function(find_and_configure_fmt) - - include(${rapids-cmake-dir}/cpm/fmt.cmake) - rapids_cpm_fmt(INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports) -endfunction() - -find_and_configure_fmt() diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index bf64d4d55..519c056b5 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - clang-tools==16.0.6 - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 112c635a8..86e887c21 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -11,7 +11,7 @@ dependencies: - clang==16.0.6 - cmake>=3.26.4,!=3.30.0 - cuda-nvcc -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cxx-compiler - cython>=3.0.0 diff --git a/conda/recipes/rmm/meta.yaml b/conda/recipes/rmm/meta.yaml index fcc7424fa..8f6e13fd7 100644 --- a/conda/recipes/rmm/meta.yaml +++ b/conda/recipes/rmm/meta.yaml @@ -38,6 +38,7 @@ build: - {{ compiler('cuda') }} - cuda-cudart-dev {% endif %} + - cuda-python requirements: build: @@ -56,10 +57,10 @@ requirements: - cuda-version ={{ cuda_version }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 {% else %} - cuda-cudart-dev - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.0,<13.0a0,<=12.6.0 {% endif %} - cython >=3.0.0 - rapids-build-backend >=0.3.0,<0.4.0.dev0 @@ -69,12 +70,15 @@ requirements: run: {% if cuda_major == "11" %} - cudatoolkit + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 {% else %} - cuda-cudart + - cuda-python >=12.0,<13.0a0,<=12.6.0 {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - numba >=0.57 - numpy >=1.23,<3.0a0 + - python test: imports: diff --git a/dependencies.yaml b/dependencies.yaml index 483c21e61..3e2c2eb29 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -153,25 +153,25 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0 + - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 - matrix: # All CUDA 11 versions packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0 + - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 - output_types: [requirements, pyproject] matrices: - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.10.*,>=0.0.0a0 + - librmm-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.10.*,>=0.0.0a0 + - librmm-cu11==24.12.*,>=0.0.0a0 - matrix: null packages: - - librmm==24.10.*,>=0.0.0a0 + - librmm==24.12.*,>=0.0.0a0 checks: common: - output_types: [conda, requirements] @@ -232,13 +232,11 @@ dependencies: packages: develop: common: - - output_types: [conda, requirements] - packages: - - gcovr>=5.0 - output_types: conda packages: - clang==16.0.6 - clang-tools==16.0.6 + - gcovr>=5.0 docs: common: - output_types: conda diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp deleted file mode 100644 index 214228752..000000000 --- a/include/rmm/detail/dynamic_load_runtime.hpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -#include - -#include -#include - -namespace RMM_NAMESPACE { -namespace detail { - -/** - * @brief `dynamic_load_runtime` loads the cuda runtime library at runtime - * - * By loading the cudart library at runtime we can use functions that - * are added in newer minor versions of the cuda runtime. - */ -struct dynamic_load_runtime { - static void* get_cuda_runtime_handle() - { - auto close_cudart = [](void* handle) { ::dlclose(handle); }; - auto open_cudart = []() { - ::dlerror(); - const int major = CUDART_VERSION / 1000; - - // In CUDA 12 the SONAME is correctly defined as libcudart.12, but for - // CUDA<=11 it includes an extra 0 minor version e.g. libcudart.11.0. We - // also allow finding the linker name. - const std::string libname_ver_cuda_11 = "libcudart.so." + std::to_string(major) + ".0"; - const std::string libname_ver_cuda_12 = "libcudart.so." + std::to_string(major); - const std::string libname = "libcudart.so"; - - void* ptr = nullptr; - for (auto&& name : {libname_ver_cuda_12, libname_ver_cuda_11, libname}) { - ptr = dlopen(name.c_str(), RTLD_LAZY); - if (ptr != nullptr) break; - } - - if (ptr != nullptr) { return ptr; } - - RMM_FAIL("Unable to dlopen cudart"); - }; - static std::unique_ptr cudart_handle{open_cudart(), close_cudart}; - return cudart_handle.get(); - } - - template - using function_sig = std::add_pointer_t; - - template - static std::optional function(const char* func_name) - { - auto* runtime = get_cuda_runtime_handle(); - auto* handle = ::dlsym(runtime, func_name); - if (!handle) { return std::nullopt; } - auto* function_ptr = reinterpret_cast(handle); - return std::optional(function_ptr); - } -}; - -#if defined(RMM_STATIC_CUDART) -// clang-format off -#define RMM_CUDART_API_WRAPPER(name, signature) \ - template \ - static cudaError_t name(Args... args) \ - { \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Waddress\"") \ - static_assert(static_cast(::name), \ - "Failed to find #name function with arguments #signature"); \ - _Pragma("GCC diagnostic pop") \ - return ::name(args...); \ - } -// clang-format on -#else -#define RMM_CUDART_API_WRAPPER(name, signature) \ - template \ - static cudaError_t name(Args... args) \ - { \ - static const auto func = dynamic_load_runtime::function(#name); \ - if (func) { return (*func)(args...); } \ - RMM_FAIL("Failed to find #name function in libcudart.so"); \ - } -#endif - -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -/** - * @brief Bind to the stream-ordered memory allocator functions - * at runtime. - * - * This allows RMM users to compile/link against CUDA 11.2+ and run with - * < CUDA 11.2 runtime as these functions are found at call time. - */ -struct async_alloc { - static bool is_supported() - { -#if defined(RMM_STATIC_CUDART) - static bool runtime_supports_pool = (CUDART_VERSION >= 11020); -#else - static bool runtime_supports_pool = - dynamic_load_runtime::function>( - "cudaFreeAsync") - .has_value(); -#endif - - static auto driver_supports_pool{[] { - int cuda_pool_supported{}; - auto result = cudaDeviceGetAttribute(&cuda_pool_supported, - cudaDevAttrMemoryPoolsSupported, - rmm::get_current_cuda_device().value()); - return result == cudaSuccess and cuda_pool_supported == 1; - }()}; - return runtime_supports_pool and driver_supports_pool; - } - - /** - * @brief Check whether the specified `cudaMemAllocationHandleType` is supported on the present - * CUDA driver/runtime version. - * - * @note This query was introduced in CUDA 11.3 so on CUDA 11.2 this function will only return - * true for `cudaMemHandleTypeNone`. - * - * @param handle_type An IPC export handle type to check for support. - * @return true if supported - * @return false if unsupported - */ - static bool is_export_handle_type_supported(cudaMemAllocationHandleType handle_type) - { - int supported_handle_types_bitmask{}; -#if CUDART_VERSION >= 11030 // 11.3 introduced cudaDevAttrMemoryPoolSupportedHandleTypes - if (cudaMemHandleTypeNone != handle_type) { - auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask, - cudaDevAttrMemoryPoolSupportedHandleTypes, - rmm::get_current_cuda_device().value()); - - // Don't throw on cudaErrorInvalidValue - auto const unsupported_runtime = (result == cudaErrorInvalidValue); - if (unsupported_runtime) return false; - // throw any other error that may have occurred - RMM_CUDA_TRY(result); - } - -#endif - return (supported_handle_types_bitmask & handle_type) == handle_type; - } - - template - using cudart_sig = dynamic_load_runtime::function_sig; - - using cudaMemPoolCreate_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolCreate, cudaMemPoolCreate_sig); - - using cudaMemPoolSetAttribute_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolSetAttribute, cudaMemPoolSetAttribute_sig); - - using cudaMemPoolDestroy_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolDestroy, cudaMemPoolDestroy_sig); - - using cudaMallocFromPoolAsync_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMallocFromPoolAsync, cudaMallocFromPoolAsync_sig); - - using cudaFreeAsync_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaFreeAsync, cudaFreeAsync_sig); - - using cudaDeviceGetDefaultMemPool_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaDeviceGetDefaultMemPool, cudaDeviceGetDefaultMemPool_sig); -}; -#endif - -#undef RMM_CUDART_API_WRAPPER -} // namespace detail -} // namespace RMM_NAMESPACE diff --git a/include/rmm/detail/format.hpp b/include/rmm/detail/format.hpp new file mode 100644 index 000000000..21acac032 --- /dev/null +++ b/include/rmm/detail/format.hpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace RMM_NAMESPACE { +namespace detail { + +/** + * @brief Format a message string with printf-style formatting + * + * This function performs printf-style formatting to avoid the need for fmt + * or spdlog's own templated APIs (which would require exposing spdlog + * symbols publicly) and returns the formatted message as a `std::string`. + * + * @param format The format string + * @param args The format arguments + * @return The formatted message + * @throw rmm::logic_error if an error occurs during formatting + */ +template +std::string formatted_log(std::string const& format, Args&&... args) +{ + auto convert_to_c_string = [](auto&& arg) -> decltype(auto) { + using ArgType = std::decay_t; + if constexpr (std::is_same_v) { + return arg.c_str(); + } else { + return std::forward(arg); + } + }; + + // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg) + auto retsize = + std::snprintf(nullptr, 0, format.c_str(), convert_to_c_string(std::forward(args))...); + RMM_EXPECTS(retsize >= 0, "Error during formatting."); + if (retsize == 0) { return {}; } + auto size = static_cast(retsize) + 1; // for null terminator + // NOLINTNEXTLINE(modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays) + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), convert_to_c_string(std::forward(args))...); + // NOLINTEND(cppcoreguidelines-pro-type-vararg) + return {buf.get(), buf.get() + size - 1}; // drop '\0' +} + +// specialization for no arguments +template <> +inline std::string formatted_log(std::string const& format) +{ + return format; +} + +// Stringify a size in bytes to a human-readable value +inline std::string format_bytes(std::size_t value) +{ + static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; + + int index = 0; + auto size = static_cast(value); + while (size > 1024) { + size /= 1024; + index++; + } + + return std::to_string(size) + ' ' + units.at(index); +} + +// Stringify a stream ID +inline std::string format_stream(rmm::cuda_stream_view stream) +{ + std::stringstream sstr{}; + sstr << std::hex << stream.value(); + return sstr.str(); +} + +} // namespace detail +} // namespace RMM_NAMESPACE diff --git a/include/rmm/detail/logging_assert.hpp b/include/rmm/detail/logging_assert.hpp index 7eb667211..4d702ee2b 100644 --- a/include/rmm/detail/logging_assert.hpp +++ b/include/rmm/detail/logging_assert.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ if (!success) { \ RMM_LOG_CRITICAL( \ "[" __FILE__ ":" RMM_STRINGIFY(__LINE__) "] Assertion " RMM_STRINGIFY(_expr) " failed."); \ - rmm::logger().flush(); \ + rmm::detail::logger().flush(); \ /* NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) */ \ assert(success); \ } \ diff --git a/include/rmm/detail/runtime_async_alloc.hpp b/include/rmm/detail/runtime_async_alloc.hpp new file mode 100644 index 000000000..6ddb2228b --- /dev/null +++ b/include/rmm/detail/runtime_async_alloc.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include + +#include +#include + +namespace RMM_NAMESPACE { +namespace detail { + +/** + * @brief Determine at runtime if the CUDA driver supports the stream-ordered + * memory allocator functions. + * + * This allows RMM users to compile/link against CUDA 11.2+ and run with + * older drivers. + */ + +struct runtime_async_alloc { + static bool is_supported() + { + static auto driver_supports_pool{[] { + int cuda_pool_supported{}; + auto result = cudaDeviceGetAttribute(&cuda_pool_supported, + cudaDevAttrMemoryPoolsSupported, + rmm::get_current_cuda_device().value()); + return result == cudaSuccess and cuda_pool_supported == 1; + }()}; + return driver_supports_pool; + } + + /** + * @brief Check whether the specified `cudaMemAllocationHandleType` is supported on the present + * CUDA driver/runtime version. + * + * @param handle_type An IPC export handle type to check for support. + * @return true if supported + * @return false if unsupported + */ + static bool is_export_handle_type_supported(cudaMemAllocationHandleType handle_type) + { + int supported_handle_types_bitmask{}; + if (cudaMemHandleTypeNone != handle_type) { + auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask, + cudaDevAttrMemoryPoolSupportedHandleTypes, + rmm::get_current_cuda_device().value()); + + // Don't throw on cudaErrorInvalidValue + auto const unsupported_runtime = (result == cudaErrorInvalidValue); + if (unsupported_runtime) return false; + // throw any other error that may have occurred + RMM_CUDA_TRY(result); + } + return (supported_handle_types_bitmask & handle_type) == handle_type; + } +}; + +} // namespace detail +} // namespace RMM_NAMESPACE diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp index 326385f16..2cfd921b1 100644 --- a/include/rmm/logger.hpp +++ b/include/rmm/logger.hpp @@ -16,15 +16,13 @@ #pragma once +#include #include +#include -#include -#include #include #include -#include -#include #include namespace RMM_NAMESPACE { @@ -70,32 +68,11 @@ struct logger_wrapper { } }; -/** - * @brief Represent a size in number of bytes. - */ -struct bytes { - std::size_t value; ///< The size in bytes - - /** - * @brief Construct a new bytes object - * - * @param os The output stream - * @param value The size in bytes - */ - friend std::ostream& operator<<(std::ostream& os, bytes const& value) - { - static std::array units{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; - - int index = 0; - auto size = static_cast(value.value); - while (size > 1024) { - size /= 1024; - index++; - } - return os << size << ' ' << units.at(index); - } -}; - +inline spdlog::logger& logger() +{ + static detail::logger_wrapper wrapped{}; + return wrapped.logger_; +} } // namespace detail /** @@ -107,10 +84,12 @@ struct bytes { * * @return spdlog::logger& The logger. */ -RMM_EXPORT inline spdlog::logger& logger() +[[deprecated( + "Support for direct access to spdlog loggers in rmm is planned for " + "removal")]] RMM_EXPORT inline spdlog::logger& +logger() { - static detail::logger_wrapper wrapped{}; - return wrapped.logger_; + return detail::logger(); } //! @cond Doxygen_Suppress @@ -118,20 +97,21 @@ RMM_EXPORT inline spdlog::logger& logger() // The default is INFO, but it should be used sparingly, so that by default a log file is only // output if there is important information, warnings, errors, and critical failures // Log messages that require computation should only be used at level TRACE and DEBUG -#define RMM_LOG_TRACE(...) SPDLOG_LOGGER_TRACE(&rmm::logger(), __VA_ARGS__) -#define RMM_LOG_DEBUG(...) SPDLOG_LOGGER_DEBUG(&rmm::logger(), __VA_ARGS__) -#define RMM_LOG_INFO(...) SPDLOG_LOGGER_INFO(&rmm::logger(), __VA_ARGS__) -#define RMM_LOG_WARN(...) SPDLOG_LOGGER_WARN(&rmm::logger(), __VA_ARGS__) -#define RMM_LOG_ERROR(...) SPDLOG_LOGGER_ERROR(&rmm::logger(), __VA_ARGS__) -#define RMM_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&rmm::logger(), __VA_ARGS__) +#define RMM_LOG_TRACE(...) \ + SPDLOG_LOGGER_TRACE(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) +#define RMM_LOG_DEBUG(...) \ + SPDLOG_LOGGER_DEBUG(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) +#define RMM_LOG_INFO(...) \ + SPDLOG_LOGGER_INFO(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) +#define RMM_LOG_WARN(...) \ + SPDLOG_LOGGER_WARN(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) +#define RMM_LOG_ERROR(...) \ + SPDLOG_LOGGER_ERROR(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) +#define RMM_LOG_CRITICAL(...) \ + SPDLOG_LOGGER_CRITICAL(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) //! @endcond } // namespace RMM_NAMESPACE -// Doxygen doesn't like this because we're overloading something from fmt -//! @cond Doxygen_Suppress -template <> -struct fmt::formatter : fmt::ostream_formatter {}; - //! @endcond diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 417b7d2b4..d3a4bb09d 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -97,7 +98,10 @@ class arena_memory_resource final : public device_memory_resource { : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { - logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + logger_ = + std::make_shared("arena_memory_dump", + std::make_shared( + "rmm_arena_memory_dump.log", true /*truncate file*/)); // Set the level to `debug` for more detailed output. logger_->set_level(spdlog::level::info); } @@ -120,7 +124,10 @@ class arena_memory_resource final : public device_memory_resource { dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { - logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log"); + logger_ = + std::make_shared("arena_memory_dump", + std::make_shared( + "rmm_arena_memory_dump.log", true /*truncate file*/)); // Set the level to `debug` for more detailed output. logger_->set_level(spdlog::level::info); } @@ -329,7 +336,8 @@ class arena_memory_resource final : public device_memory_resource { void dump_memory_log(size_t bytes) { logger_->info("**************************************************"); - logger_->info("Ran out of memory trying to allocate {}.", rmm::detail::bytes{bytes}); + logger_->info(rmm::detail::formatted_log("Ran out of memory trying to allocate %s.", + rmm::detail::format_bytes(bytes))); logger_->info("**************************************************"); logger_->info("Global arena:"); global_arena_.dump_memory_log(logger_); diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 52fd2fe4e..b1fc0b112 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -17,9 +17,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -31,12 +31,6 @@ #include #include -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -#ifndef RMM_DISABLE_CUDA_MALLOC_ASYNC -#define RMM_CUDA_MALLOC_ASYNC_SUPPORT -#endif -#endif - namespace RMM_NAMESPACE { namespace mr { /** @@ -91,9 +85,8 @@ class cuda_async_memory_resource final : public device_memory_resource { std::optional release_threshold = {}, std::optional export_handle_type = {}) { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT // Check if cudaMallocAsync Memory pool supported - RMM_EXPECTS(rmm::detail::async_alloc::is_supported(), + RMM_EXPECTS(rmm::detail::runtime_async_alloc::is_supported(), "cudaMallocAsync not supported with this CUDA driver/runtime version"); // Construct explicit pool @@ -101,12 +94,13 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_props.allocType = cudaMemAllocationTypePinned; pool_props.handleTypes = static_cast( export_handle_type.value_or(allocation_handle_type::none)); - RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes), - "Requested IPC memory handle type not supported"); + RMM_EXPECTS( + rmm::detail::runtime_async_alloc::is_export_handle_type_supported(pool_props.handleTypes), + "Requested IPC memory handle type not supported"); pool_props.location.type = cudaMemLocationTypeDevice; pool_props.location.id = rmm::get_current_cuda_device().value(); cudaMemPool_t cuda_pool_handle{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props)); + RMM_CUDA_TRY(cudaMemPoolCreate(&cuda_pool_handle, &pool_props)); pool_ = cuda_async_view_memory_resource{cuda_pool_handle}; // CUDA drivers before 11.5 have known incompatibilities with the async allocator. @@ -117,41 +111,34 @@ class cuda_async_memory_resource final : public device_memory_resource { constexpr auto min_async_version{11050}; if (driver_version < min_async_version) { int disabled{0}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute( - pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); + RMM_CUDA_TRY( + cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute( - pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold)); + RMM_CUDA_TRY( + cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold)); // Allocate and immediately deallocate the initial_pool_size to prime the pool with the // specified size auto const pool_size = initial_pool_size.value_or(free / 2); auto* ptr = do_allocate(pool_size, cuda_stream_default); do_deallocate(ptr, pool_size, cuda_stream_default); -#else - RMM_FAIL( - "cudaMallocAsync not supported by the version of the CUDA Toolkit used for this build"); -#endif } -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Returns the underlying native handle to the CUDA pool * + * @return cudaMemPool_t Handle to the underlying CUDA pool */ [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); } -#endif ~cuda_async_memory_resource() override { -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) - RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaMemPoolDestroy(pool_handle())); -#endif + RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle())); } cuda_async_memory_resource(cuda_async_memory_resource const&) = delete; cuda_async_memory_resource(cuda_async_memory_resource&&) = delete; @@ -159,9 +146,7 @@ class cuda_async_memory_resource final : public device_memory_resource { cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) = delete; private: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cuda_async_view_memory_resource pool_{}; -#endif /** * @brief Allocates memory of size at least \p bytes. @@ -175,12 +160,7 @@ class cuda_async_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { void* ptr{nullptr}; -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT ptr = pool_.allocate(bytes, stream); -#else - (void)bytes; - (void)stream; -#endif return ptr; } @@ -194,13 +174,7 @@ class cuda_async_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT pool_.deallocate(ptr, bytes, stream); -#else - (void)ptr; - (void)bytes; - (void)stream; -#endif } /** @@ -213,11 +187,7 @@ class cuda_async_memory_resource final : public device_memory_resource { [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { auto const* async_mr = dynamic_cast(&other); -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT return (async_mr != nullptr) && (this->pool_handle() == async_mr->pool_handle()); -#else - return async_mr != nullptr; -#endif } }; diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 3e1900e72..180c412ee 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -28,10 +27,6 @@ #include #include -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -#define RMM_CUDA_MALLOC_ASYNC_SUPPORT -#endif - namespace RMM_NAMESPACE { namespace mr { /** @@ -46,13 +41,12 @@ namespace mr { */ class cuda_async_view_memory_resource final : public device_memory_resource { public: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Constructs a cuda_async_view_memory_resource which uses an existing CUDA memory pool. * The provided pool is not owned by cuda_async_view_memory_resource and must remain valid * during the lifetime of the memory resource. * - * @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync` + * @throws rmm::logic_error if the CUDA version does not support `cudaMallocAsync` * * @param valid_pool_handle Handle to a CUDA memory pool which will be used to * serve allocation requests. @@ -71,15 +65,13 @@ class cuda_async_view_memory_resource final : public device_memory_resource { RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported, "cudaMallocAsync not supported with this CUDA driver/runtime version"); } -#endif -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Returns the underlying native handle to the CUDA pool * + * @return cudaMemPool_t Handle to the underlying CUDA pool */ [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } -#endif cuda_async_view_memory_resource() = default; cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = @@ -92,9 +84,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource { default; ///< @default_move_assignment{cuda_async_view_memory_resource} private: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cudaMemPool_t cuda_pool_handle_{}; -#endif /** * @brief Allocates memory of size at least \p bytes. @@ -108,15 +98,9 @@ class cuda_async_view_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { void* ptr{nullptr}; -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (bytes > 0) { - RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync( - &ptr, bytes, pool_handle(), stream.value())); + RMM_CUDA_TRY_ALLOC(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value())); } -#else - (void)bytes; - (void)stream; -#endif return ptr; } @@ -132,15 +116,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource { [[maybe_unused]] std::size_t bytes, rmm::cuda_stream_view stream) override { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - if (ptr != nullptr) { - RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value())); - } -#else - (void)ptr; - (void)bytes; - (void)stream; -#endif + if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); } } /** diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 6f8303c83..419c4fcf4 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -647,37 +647,42 @@ class global_arena final { * * @param logger the spdlog logger to use */ - void dump_memory_log(std::shared_ptr const& logger) const + RMM_HIDDEN void dump_memory_log(std::shared_ptr const& logger) const { std::lock_guard lock(mtx_); - logger->info(" Arena size: {}", rmm::detail::bytes{upstream_block_.size()}); - logger->info(" # superblocks: {}", superblocks_.size()); + logger->info(rmm::detail::formatted_log(" Arena size: %s", + rmm::detail::format_bytes(upstream_block_.size()))); + logger->info(rmm::detail::formatted_log(" # superblocks: %zu", superblocks_.size())); if (!superblocks_.empty()) { - logger->debug(" Total size of superblocks: {}", - rmm::detail::bytes{total_memory_size(superblocks_)}); + logger->debug( + rmm::detail::formatted_log(" Total size of superblocks: %s", + rmm::detail::format_bytes(total_memory_size(superblocks_)))); auto const total_free = total_free_size(superblocks_); auto const max_free = max_free_size(superblocks_); auto const fragmentation = (1 - max_free / static_cast(total_free)) * 100; - logger->info(" Total free memory: {}", rmm::detail::bytes{total_free}); - logger->info(" Largest block of free memory: {}", rmm::detail::bytes{max_free}); - logger->info(" Fragmentation: {:.2f}%", fragmentation); + logger->info(rmm::detail::formatted_log(" Total free memory: %s", + rmm::detail::format_bytes(total_free))); + logger->info(rmm::detail::formatted_log(" Largest block of free memory: %s", + rmm::detail::format_bytes(max_free))); + logger->info(rmm::detail::formatted_log(" Fragmentation: %0.2f", fragmentation)); - auto index = 0; + auto index = decltype(superblocks_.size()){0}; char* prev_end{}; for (auto const& sblk : superblocks_) { if (prev_end == nullptr) { prev_end = sblk.pointer(); } - logger->debug( - " Superblock {}: start={}, end={}, size={}, empty={}, # free blocks={}, max free={}, " - "gap={}", + logger->debug(rmm::detail::formatted_log( + " Superblock %zu: start=%p, end=%p, size=%s, empty=%s, # free blocks=%zu, max " + "free=%s, " + "gap=%s", index, - fmt::ptr(sblk.pointer()), - fmt::ptr(sblk.end()), - rmm::detail::bytes{sblk.size()}, - sblk.empty(), + sblk.pointer(), + sblk.end(), + rmm::detail::format_bytes(sblk.size()), + sblk.empty() ? "T" : "F", sblk.free_blocks(), - rmm::detail::bytes{sblk.max_free_size()}, - rmm::detail::bytes{static_cast(sblk.pointer() - prev_end)}); + rmm::detail::format_bytes(sblk.max_free_size()), + rmm::detail::format_bytes(static_cast(sblk.pointer() - prev_end)))); prev_end = sblk.end(); index++; } diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp index 8d5cbf9ed..8b056e6d9 100644 --- a/include/rmm/mr/device/detail/coalescing_free_list.hpp +++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp @@ -20,8 +20,6 @@ #include #include -#include - #include #include #include @@ -131,10 +129,7 @@ struct block : public block_base { /** * @brief Print this block. For debugging. */ - inline void print() const - { - std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl; - } + inline void print() const { std::cout << pointer() << " " << size() << " B" << std::endl; } #endif private: @@ -146,7 +141,7 @@ struct block : public block_base { /// Print block on an ostream inline std::ostream& operator<<(std::ostream& out, const block& blk) { - out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size()); + out << blk.pointer() << " " << blk.size() << " B" << std::endl; return out; } #endif diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index 9cf674d6e..f177504f2 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -19,13 +19,12 @@ #include #include #include +#include #include #include #include -#include - #include #include #include @@ -201,7 +200,7 @@ class stream_ordered_memory_resource : public crtp, public device_ */ void* do_allocate(std::size_t size, cuda_stream_view stream) override { - RMM_LOG_TRACE("[A][stream {:p}][{}B]", fmt::ptr(stream.value()), size); + RMM_LOG_TRACE("[A][stream %s][%zuB]", rmm::detail::format_stream(stream), size); if (size <= 0) { return nullptr; } @@ -215,10 +214,10 @@ class stream_ordered_memory_resource : public crtp, public device_ rmm::out_of_memory); auto const block = this->underlying().get_block(size, stream_event); - RMM_LOG_TRACE("[A][stream {:p}][{}B][{:p}]", - fmt::ptr(stream_event.stream), + RMM_LOG_TRACE("[A][stream %s][%zuB][%p]", + rmm::detail::format_stream(stream_event.stream), size, - fmt::ptr(block.pointer())); + block.pointer()); log_summary_trace(); @@ -234,7 +233,7 @@ class stream_ordered_memory_resource : public crtp, public device_ */ void do_deallocate(void* ptr, std::size_t size, cuda_stream_view stream) override { - RMM_LOG_TRACE("[D][stream {:p}][{}B][{:p}]", fmt::ptr(stream.value()), size, ptr); + RMM_LOG_TRACE("[D][stream %s][%zuB][%p]", rmm::detail::format_stream(stream), size, ptr); if (size <= 0 || ptr == nullptr) { return; } @@ -384,10 +383,10 @@ class stream_ordered_memory_resource : public crtp, public device_ if (merge_first) { merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); - RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]", - fmt::ptr(stream_event.stream), + RMM_LOG_DEBUG("[A][Stream %s][%zuB][Merged stream %s]", + rmm::detail::format_stream(stream_event.stream), size, - fmt::ptr(iter->first.stream)); + rmm::detail::format_stream(iter->first.stream)); stream_free_blocks_.erase(iter); @@ -414,11 +413,11 @@ class stream_ordered_memory_resource : public crtp, public device_ block_type const block = find_block(iter); if (block.is_valid()) { - RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]" - : "[A][Stream {:p}][{}B][Taken from stream {:p}]", - fmt::ptr(stream_event.stream), + RMM_LOG_DEBUG((merge_first) ? "[A][Stream %s][%zuB][Found after merging stream %s]" + : "[A][Stream %s][%zuB][Taken from stream %s]", + rmm::detail::format_stream(stream_event.stream), size, - fmt::ptr(iter->first.stream)); + rmm::detail::format_stream(iter->first.stream)); return block; } } @@ -471,7 +470,7 @@ class stream_ordered_memory_resource : public crtp, public device_ max_block = std::max(summary.first, max_block); free_mem += summary.second; }); - RMM_LOG_TRACE("[Summary][Free lists: {}][Blocks: {}][Max Block: {}][Total Free: {}]", + RMM_LOG_TRACE("[Summary][Free lists: %zu][Blocks: %zu][Max Block: %zu][Total Free: %zu]", stream_free_blocks_.size(), num_blocks, max_block, diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 595ab2e4e..578543852 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -18,16 +18,17 @@ #include #include #include +#include #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -297,10 +298,12 @@ class logging_resource_adaptor final : public device_memory_resource { { try { auto const ptr = get_upstream_resource().allocate_async(bytes, stream); - logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + logger_->info(rmm::detail::formatted_log( + "allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream))); return ptr; } catch (...) { - logger_->info("allocate failure,{},{},{}", nullptr, bytes, fmt::ptr(stream.value())); + logger_->info(rmm::detail::formatted_log( + "allocate failure,%p,%zu,%s", nullptr, bytes, rmm::detail::format_stream(stream))); throw; } } @@ -321,7 +324,8 @@ class logging_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + logger_->info( + rmm::detail::formatted_log("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream))); get_upstream_resource().deallocate_async(ptr, bytes, stream); } diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index f63de21ff..037147de3 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include #include -#include - #include #include #include @@ -271,8 +270,8 @@ class pool_memory_resource final } try_size = std::max(min_size, try_size / 2); } - RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]", - fmt::ptr(stream.value()), + RMM_LOG_ERROR("[A][Stream %s][Upstream %zuB][FAILURE maximum pool size exceeded]", + rmm::detail::format_stream(stream), min_size); RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); } @@ -351,7 +350,7 @@ class pool_memory_resource final */ std::optional block_from_upstream(std::size_t size, cuda_stream_view stream) { - RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size); + RMM_LOG_DEBUG("[A][Stream %s][Upstream %zuB]", rmm::detail::format_stream(stream), size); if (size == 0) { return {}; } diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 6a5916e5c..8131eef4d 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -23,8 +23,6 @@ #include #include -#include - #include #include #include @@ -188,7 +186,7 @@ class tracking_resource_adaptor final : public device_memory_resource { void log_outstanding_allocations() const { #if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG - RMM_LOG_DEBUG("Outstanding Allocations: {}", get_outstanding_allocations_str()); + RMM_LOG_DEBUG("Outstanding Allocations: %s", get_outstanding_allocations_str()); #endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG } @@ -236,12 +234,12 @@ class tracking_resource_adaptor final : public device_memory_resource { // Ensure the allocation is found and the number of bytes match if (found == allocations_.end()) { - // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call + // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call // std::terminate RMM_LOG_ERROR( - "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: " - "{}", - fmt::ptr(ptr), + "Deallocating a pointer that was not tracked. Ptr: %p [%zuB], Current Num. Allocations: " + "%zu", + ptr, bytes, this->allocations_.size()); } else { @@ -250,10 +248,10 @@ class tracking_resource_adaptor final : public device_memory_resource { auto allocated_bytes = found->second.allocation_size; if (allocated_bytes != bytes) { - // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call + // Don't throw but log an error. Throwing in a destructor (or any noexcept) will call // std::terminate RMM_LOG_ERROR( - "Alloc bytes ({}) and Dealloc bytes ({}) do not match", allocated_bytes, bytes); + "Alloc bytes (%zu) and Dealloc bytes (%zu) do not match", allocated_bytes, bytes); bytes = allocated_bytes; } diff --git a/python/librmm/pyproject.toml b/python/librmm/pyproject.toml index 0f0b4e397..bae2ef36b 100644 --- a/python/librmm/pyproject.toml +++ b/python/librmm/pyproject.toml @@ -67,3 +67,11 @@ wheel.py-api = "py3" provider = "scikit_build_core.metadata.regex" input = "librmm/VERSION" regex = "(?P.*)" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt index 6c2515102..ac8495e14 100644 --- a/python/rmm/CMakeLists.txt +++ b/python/rmm/CMakeLists.txt @@ -30,4 +30,5 @@ rapids_cython_init() add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") add_subdirectory(rmm/_cuda) -add_subdirectory(rmm/_lib) +add_subdirectory(rmm/librmm) +add_subdirectory(rmm/pylibrmm) diff --git a/python/rmm/docs/conf.py b/python/rmm/docs/conf.py index d48dc2b42..2aad3a82c 100644 --- a/python/rmm/docs/conf.py +++ b/python/rmm/docs/conf.py @@ -12,6 +12,7 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # +import datetime import os import re @@ -22,8 +23,8 @@ # -- Project information ----------------------------------------------------- project = "rmm" -copyright = "2020-2023, NVIDIA" -author = "NVIDIA" +copyright = f"2018-{datetime.datetime.today().year}, NVIDIA Corporation" +author = "NVIDIA Corporation" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -118,19 +119,6 @@ html_theme = "sphinx_rtd_theme" -# on_rtd is whether we are on readthedocs.org -on_rtd = os.environ.get("READTHEDOCS", None) == "True" - -if not on_rtd: - # only import and set the theme if we're building docs locally - # otherwise, readthedocs.org uses their theme by default, - # so no need to specify it - import sphinx_rtd_theme - - html_theme = "sphinx_rtd_theme" - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - - # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. @@ -209,7 +197,10 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "numba": ("https://numba.readthedocs.io/en/stable", None), - "cuda-python": ("https://nvidia.github.io/cuda-python/", None), + "cuda-python": ( + "https://nvidia.github.io/cuda-python/cuda-bindings/", + None, + ), } # Config numpydoc diff --git a/python/rmm/docs/guide.md b/python/rmm/docs/guide.md index 22c0dc023..c7e940497 100644 --- a/python/rmm/docs/guide.md +++ b/python/rmm/docs/guide.md @@ -236,17 +236,17 @@ Common to both usages is that they modify the currently active RMM memory resour >>> # We start with the default cuda memory resource >>> rmm.mr.get_current_device_resource() - + >>> # When using statistics, we get a StatisticsResourceAdaptor with the context >>> with rmm.statistics.statistics(): ... rmm.mr.get_current_device_resource() - + >>> # We can also enable statistics globally >>> rmm.statistics.enable_statistics() >>> print(rmm.mr.get_current_device_resource()) - + ``` With statistics enabled, you can query statistics of the current and peak bytes and number of allocations performed by the current RMM memory resource: diff --git a/python/rmm/pyproject.toml b/python/rmm/pyproject.toml index 7577ad961..aaaa15482 100644 --- a/python/rmm/pyproject.toml +++ b/python/rmm/pyproject.toml @@ -30,7 +30,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cuda-python>=11.7.1,<12.0a0", + "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "numba>=0.57", "numpy>=1.23,<3.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. @@ -128,14 +128,23 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ "cmake>=3.26.4,!=3.30.0", - "cuda-python>=11.7.1,<12.0a0", + "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "cython>=3.0.0", - "librmm==24.10.*,>=0.0.0a0", + "librmm==24.12.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] # treat warnings as errors filterwarnings = [ "error", + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning" ] diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index 1e3b5c8b1..832fec095 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from rmm import mr -from rmm._lib.device_buffer import DeviceBuffer -from rmm._lib.logger import ( +from rmm._version import __git_commit__, __version__ +from rmm.mr import disable_logging, enable_logging, get_log_filenames +from rmm.pylibrmm.device_buffer import DeviceBuffer +from rmm.pylibrmm.logger import ( flush_logger, get_flush_level, get_logging_level, @@ -23,8 +27,6 @@ set_logging_level, should_log, ) -from rmm._version import __git_commit__, __version__ -from rmm.mr import disable_logging, enable_logging, get_log_filenames from rmm.rmm import ( RMMError, is_initialized, @@ -52,3 +54,19 @@ "should_log", "unregister_reinitialize_hook", ] + + +def __getattr__(name): + if name == "_lib": + import importlib + + warnings.warn( + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", + FutureWarning, + stacklevel=2, + ) + + module = importlib.import_module("rmm.pylibrmm") + return module + else: + raise AttributeError(f"Module '{__name__}' has no attribute '{name}'") diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd index 3c3d3aa6f..e91e2ce58 100644 --- a/python/rmm/rmm/_cuda/stream.pxd +++ b/python/rmm/rmm/_cuda/stream.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view cdef class Stream: diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx index 4d5ff5232..37dcbd610 100644 --- a/python/rmm/rmm/_cuda/stream.pyx +++ b/python/rmm/rmm/_cuda/stream.pyx @@ -16,13 +16,13 @@ from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t from libcpp cimport bool -from rmm._lib.cuda_stream cimport CudaStream -from rmm._lib.cuda_stream_view cimport ( +from rmm.librmm.cuda_stream_view cimport ( cuda_stream_default, cuda_stream_legacy, cuda_stream_per_thread, cuda_stream_view, ) +from rmm.pylibrmm.cuda_stream cimport CudaStream cdef class Stream: diff --git a/python/rmm/rmm/_lib/__init__.py b/python/rmm/rmm/_lib/__init__.py index 0b8672ef6..7e01bda77 100644 --- a/python/rmm/rmm/_lib/__init__.py +++ b/python/rmm/rmm/_lib/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,4 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .device_buffer import DeviceBuffer +import warnings + +from rmm.pylibrmm import * + +warnings.warn( + "The `rmm._lib` module is deprecated in will be removed in a future release. Use `rmm.pylibrmm` instead.", + FutureWarning, + stacklevel=2, +) diff --git a/python/rmm/rmm/_lib/cuda_stream.pxd b/python/rmm/rmm/_lib/cuda_stream.pxd index e224cf9af..afc365fbb 100644 --- a/python/rmm/rmm/_lib/cuda_stream.pxd +++ b/python/rmm/rmm/_lib/cuda_stream.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,26 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -cimport cython -from cuda.ccudart cimport cudaStream_t -from libcpp cimport bool -from libcpp.memory cimport unique_ptr - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream: - cuda_stream() except + - bool is_valid() except + - cudaStream_t value() except + - cuda_stream_view view() except + - void synchronize() except + - void synchronize_no_throw() - - -@cython.final -cdef class CudaStream: - cdef unique_ptr[cuda_stream] c_obj - cdef cudaStream_t value(self) except * nogil - cdef bool is_valid(self) except * nogil +from rmm.librmm.cuda_stream cimport cuda_stream +from rmm.pylibrmm.cuda_stream cimport CudaStream diff --git a/python/rmm/rmm/_lib/lib.pxd b/python/rmm/rmm/_lib/cuda_stream.py similarity index 70% rename from python/rmm/rmm/_lib/lib.pxd rename to python/rmm/rmm/_lib/cuda_stream.py index e35b672e4..1eb424e12 100644 --- a/python/rmm/rmm/_lib/lib.pxd +++ b/python/rmm/rmm/_lib/cuda_stream.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from libc.stdint cimport uintptr_t -from libcpp cimport bool -from libcpp.utility cimport pair -from libcpp.vector cimport vector - -ctypedef pair[const char*, unsigned int] caller_pair +from rmm.pylibrmm.cuda_stream import CudaStream # noqa: F401 diff --git a/python/rmm/rmm/_lib/cuda_stream_pool.pxd b/python/rmm/rmm/_lib/cuda_stream_pool.pxd index 0286a9377..4da59cc68 100644 --- a/python/rmm/rmm/_lib/cuda_stream_pool.pxd +++ b/python/rmm/rmm/_lib/cuda_stream_pool.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,14 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -cimport cython - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream_pool: - cuda_stream_pool(size_t pool_size) - cuda_stream_view get_stream() - cuda_stream_view get_stream(size_t stream_id) except + - size_t get_pool_size() +from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool diff --git a/python/rmm/rmm/_lib/cuda_stream_view.pxd b/python/rmm/rmm/_lib/cuda_stream_view.pxd index bf0d33c24..c336b0fe8 100644 --- a/python/rmm/rmm/_lib/cuda_stream_view.pxd +++ b/python/rmm/rmm/_lib/cuda_stream_view.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,21 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cuda.ccudart cimport cudaStream_t -from libcpp cimport bool - - -cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: - cdef cppclass cuda_stream_view: - cuda_stream_view() - cuda_stream_view(cudaStream_t) - cudaStream_t value() - bool is_default() - bool is_per_thread_default() - void synchronize() except + - - cdef bool operator==(cuda_stream_view const, cuda_stream_view const) - - const cuda_stream_view cuda_stream_default - const cuda_stream_view cuda_stream_legacy - const cuda_stream_view cuda_stream_per_thread +from rmm.librmm.cuda_stream_view cimport ( + cuda_stream_default, + cuda_stream_legacy, + cuda_stream_per_thread, + cuda_stream_view, +) diff --git a/python/rmm/rmm/_lib/device_buffer.pxd b/python/rmm/rmm/_lib/device_buffer.pxd index 0da9ace0c..22833b1b8 100644 --- a/python/rmm/rmm/_lib/device_buffer.pxd +++ b/python/rmm/rmm/_lib/device_buffer.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,105 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr - -from rmm._cuda.stream cimport Stream -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, +from rmm.librmm.device_buffer cimport ( + cuda_device_id, + device_buffer, + get_current_cuda_device, + prefetch, +) +from rmm.pylibrmm.device_buffer cimport ( + DeviceBuffer, + copy_device_to_ptr, + copy_host_to_ptr, + copy_ptr_to_host, + to_device, ) - - -cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: - cdef cppclass cuda_device_id: - ctypedef int value_type - cuda_device_id() - cuda_device_id(value_type id) - value_type value() - - cdef cuda_device_id get_current_cuda_device() - -cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil: - cdef void prefetch(const void* ptr, - size_t bytes, - cuda_device_id device, - cuda_stream_view stream) except + - -cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: - cdef cppclass device_buffer: - device_buffer() - device_buffer( - size_t size, - cuda_stream_view stream, - device_memory_resource * - ) except + - device_buffer( - const void* source_data, - size_t size, - cuda_stream_view stream, - device_memory_resource * - ) except + - device_buffer( - const device_buffer buf, - cuda_stream_view stream, - device_memory_resource * - ) except + - void reserve(size_t new_capacity, cuda_stream_view stream) except + - void resize(size_t new_size, cuda_stream_view stream) except + - void shrink_to_fit(cuda_stream_view stream) except + - void* data() - size_t size() - size_t capacity() - - -cdef class DeviceBuffer: - cdef unique_ptr[device_buffer] c_obj - - # Holds a reference to the DeviceMemoryResource used for allocation. - # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is - # needed for deallocation - cdef DeviceMemoryResource mr - - # Holds a reference to the stream used by the underlying `device_buffer`. - # Ensures the stream does not get destroyed before this DeviceBuffer - cdef Stream stream - - @staticmethod - cdef DeviceBuffer c_from_unique_ptr( - unique_ptr[device_buffer] ptr, - Stream stream=*, - DeviceMemoryResource mr=*, - ) - - @staticmethod - cdef DeviceBuffer c_to_device(const unsigned char[::1] b, - Stream stream=*) except * - cpdef copy_to_host(self, ary=*, Stream stream=*) - cpdef copy_from_host(self, ary, Stream stream=*) - cpdef copy_from_device(self, cuda_ary, Stream stream=*) - cpdef bytes tobytes(self, Stream stream=*) - - cdef size_t c_size(self) except * - cpdef void reserve(self, size_t new_capacity, Stream stream=*) except * - cpdef void resize(self, size_t new_size, Stream stream=*) except * - cpdef size_t capacity(self) except * - cdef void* c_data(self) except * - - cdef device_buffer c_release(self) except * - -cpdef DeviceBuffer to_device(const unsigned char[::1] b, - Stream stream=*) -cpdef void copy_ptr_to_host(uintptr_t db, - unsigned char[::1] hb, - Stream stream=*) except * - -cpdef void copy_host_to_ptr(const unsigned char[::1] hb, - uintptr_t db, - Stream stream=*) except * - -cpdef void copy_device_to_ptr(uintptr_t d_src, - uintptr_t d_dst, - size_t count, - Stream stream=*) except * diff --git a/python/rmm/rmm/_lib/device_buffer.py b/python/rmm/rmm/_lib/device_buffer.py new file mode 100644 index 000000000..c531bca5f --- /dev/null +++ b/python/rmm/rmm/_lib/device_buffer.py @@ -0,0 +1,21 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.device_buffer import ( # noqa: F401 + DeviceBuffer, + copy_device_to_ptr, + copy_host_to_ptr, + copy_ptr_to_host, + to_device, +) diff --git a/python/rmm/rmm/_lib/device_uvector.pxd b/python/rmm/rmm/_lib/device_uvector.pxd index 29e122bbf..230b0afb3 100644 --- a/python/rmm/rmm/_lib/device_uvector.pxd +++ b/python/rmm/rmm/_lib/device_uvector.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,28 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.device_buffer cimport device_buffer -from rmm._lib.memory_resource cimport device_memory_resource - - -cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: - cdef cppclass device_uvector[T]: - device_uvector(size_t size, cuda_stream_view stream) except + - T* element_ptr(size_t index) - void set_element(size_t element_index, const T& v, cuda_stream_view s) - void set_element_async( - size_t element_index, - const T& v, - cuda_stream_view s - ) except + - T front_element(cuda_stream_view s) except + - T back_element(cuda_stream_view s) except + - void reserve(size_t new_capacity, cuda_stream_view stream) except + - void resize(size_t new_size, cuda_stream_view stream) except + - void shrink_to_fit(cuda_stream_view stream) except + - device_buffer release() - size_t capacity() - T* data() - size_t size() - device_memory_resource* memory_resource() +from rmm.librmm.device_uvector cimport device_uvector diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd index 8ca151c00..4a5159435 100644 --- a/python/rmm/rmm/_lib/helper.pxd +++ b/python/rmm/rmm/_lib/helper.pxd @@ -12,5 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. - -cdef object parse_bytes(object s) except * +from rmm.pylibrmm.helper cimport parse_bytes diff --git a/python/rmm/rmm/_lib/logger.pxd b/python/rmm/rmm/_lib/logger.pxd new file mode 100644 index 000000000..bef05c903 --- /dev/null +++ b/python/rmm/rmm/_lib/logger.pxd @@ -0,0 +1,24 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm._logger cimport logger, logging_level, spdlog_logger +from rmm.pylibrmm.logger cimport ( + _validate_level_type, + flush_logger, + get_flush_level, + get_logging_level, + set_flush_level, + set_logging_level, + should_log, +) diff --git a/python/rmm/rmm/_lib/logger.py b/python/rmm/rmm/_lib/logger.py new file mode 100644 index 000000000..1e9b519b8 --- /dev/null +++ b/python/rmm/rmm/_lib/logger.py @@ -0,0 +1,24 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm._logger import logging_level # noqa: F401 +from rmm.pylibrmm.logger import ( # noqa: F401 + _validate_level_type, + flush_logger, + get_flush_level, + get_logging_level, + set_flush_level, + set_logging_level, + should_log, +) diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 000a3fe1e..0d11001a4 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,92 +12,51 @@ # See the License for the specific language governing permissions and # limitations under the License. -from libc.stdint cimport int8_t -from libcpp.memory cimport shared_ptr -from libcpp.pair cimport pair -from libcpp.string cimport string -from libcpp.vector cimport vector - -from rmm._lib.cuda_stream_view cimport cuda_stream_view - - -cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass device_memory_resource: - void* allocate(size_t bytes) except + - void* allocate(size_t bytes, cuda_stream_view stream) except + - void deallocate(void* ptr, size_t bytes) except + - void deallocate( - void* ptr, - size_t bytes, - cuda_stream_view stream - ) except + - -cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: - size_t percent_of_free_device_memory(int percent) except + - pair[size_t, size_t] available_device_memory() except + - -cdef class DeviceMemoryResource: - cdef shared_ptr[device_memory_resource] c_obj - cdef device_memory_resource* get_mr(self) noexcept nogil - -cdef class UpstreamResourceAdaptor(DeviceMemoryResource): - cdef readonly DeviceMemoryResource upstream_mr - - cpdef DeviceMemoryResource get_upstream(self) - -cdef class CudaMemoryResource(DeviceMemoryResource): - pass - -cdef class ManagedMemoryResource(DeviceMemoryResource): - pass - -cdef class SystemMemoryResource(DeviceMemoryResource): - pass - -cdef class SamHeadroomMemoryResource(DeviceMemoryResource): - pass - -cdef class CudaAsyncMemoryResource(DeviceMemoryResource): - pass - -cdef class PoolMemoryResource(UpstreamResourceAdaptor): - pass - -cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): - pass - -cdef class BinningMemoryResource(UpstreamResourceAdaptor): - - cdef readonly list _bin_mrs - - cpdef add_bin( - self, - size_t allocation_size, - DeviceMemoryResource bin_resource=*) - -cdef class CallbackMemoryResource(DeviceMemoryResource): - cdef object _allocate_func - cdef object _deallocate_func - -cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): - cdef object _log_file_name - cpdef get_file_name(self) - cpdef flush(self) - -cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): - pass - -cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): - cdef object _callback - -cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): - pass - -cpdef DeviceMemoryResource get_current_device_resource() +from rmm.librmm.memory_resource cimport ( + CppExcept, + allocate_callback_t, + allocation_handle_type, + available_device_memory, + binning_memory_resource, + callback_memory_resource, + cuda_async_memory_resource, + cuda_memory_resource, + deallocate_callback_t, + device_memory_resource, + failure_callback_resource_adaptor, + failure_callback_t, + fixed_size_memory_resource, + limiting_resource_adaptor, + logging_resource_adaptor, + managed_memory_resource, + percent_of_free_device_memory, + pool_memory_resource, + prefetch_resource_adaptor, + sam_headroom_memory_resource, + statistics_resource_adaptor, + system_memory_resource, + throw_cpp_except, + tracking_resource_adaptor, + translate_python_except_to_cpp, +) +from rmm.pylibrmm.memory_resource cimport ( + ArenaMemoryResource, + BinningMemoryResource, + CallbackMemoryResource, + CudaAsyncMemoryResource, + CudaMemoryResource, + DeviceMemoryResource, + FailureCallbackResourceAdaptor, + FixedSizeMemoryResource, + LimitingResourceAdaptor, + LoggingResourceAdaptor, + ManagedMemoryResource, + PoolMemoryResource, + PrefetchResourceAdaptor, + SamHeadroomMemoryResource, + StatisticsResourceAdaptor, + SystemMemoryResource, + TrackingResourceAdaptor, + UpstreamResourceAdaptor, + get_current_device_resource, +) diff --git a/python/rmm/rmm/_lib/memory_resource.py b/python/rmm/rmm/_lib/memory_resource.py new file mode 100644 index 000000000..f3a24f635 --- /dev/null +++ b/python/rmm/rmm/_lib/memory_resource.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.pylibrmm.memory_resource import ( # noqa: F401 + ArenaMemoryResource, + BinningMemoryResource, + CallbackMemoryResource, + CudaAsyncMemoryResource, + CudaMemoryResource, + DeviceMemoryResource, + FailureCallbackResourceAdaptor, + FixedSizeMemoryResource, + LimitingResourceAdaptor, + LoggingResourceAdaptor, + ManagedMemoryResource, + PoolMemoryResource, + PrefetchResourceAdaptor, + SamHeadroomMemoryResource, + StatisticsResourceAdaptor, + SystemMemoryResource, + TrackingResourceAdaptor, + UpstreamResourceAdaptor, + _flush_logs, + available_device_memory, + disable_logging, + enable_logging, + get_current_device_resource, + get_current_device_resource_type, + get_log_filenames, + get_per_device_resource_type, + is_initialized, + set_current_device_resource, + set_per_device_resource, +) diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd index c33217622..29487f503 100644 --- a/python/rmm/rmm/_lib/per_device_resource.pxd +++ b/python/rmm/rmm/_lib/per_device_resource.pxd @@ -1,23 +1,21 @@ -from rmm._lib.memory_resource cimport device_memory_resource +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. - -cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: - cdef cppclass cuda_device_id: - ctypedef int value_type - - cuda_device_id(value_type id) - - value_type value() - -cdef extern from "rmm/mr/device/per_device_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef device_memory_resource* set_current_device_resource( - device_memory_resource* new_mr - ) - cdef device_memory_resource* get_current_device_resource() - cdef device_memory_resource* set_per_device_resource( - cuda_device_id id, device_memory_resource* new_mr - ) - cdef device_memory_resource* get_per_device_resource ( - cuda_device_id id - ) +from rmm.librmm.per_device_resource cimport ( + cuda_device_id, + get_current_device_resource, + get_per_device_resource, + set_current_device_resource, + set_per_device_resource, +) diff --git a/python/rmm/rmm/allocators/cupy.py b/python/rmm/rmm/allocators/cupy.py index 89947c46b..780ff2abf 100644 --- a/python/rmm/rmm/allocators/cupy.py +++ b/python/rmm/rmm/allocators/cupy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm import _lib as librmm +from rmm import pylibrmm from rmm._cuda.stream import Stream try: @@ -34,7 +34,7 @@ def rmm_cupy_allocator(nbytes): raise ModuleNotFoundError("No module named 'cupy'") stream = Stream(obj=cupy.cuda.get_current_stream()) - buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) + buf = pylibrmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id() mem = cupy.cuda.UnownedMemory( ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id diff --git a/python/rmm/rmm/allocators/numba.py b/python/rmm/rmm/allocators/numba.py index 5e87b87b6..fd9bacb5a 100644 --- a/python/rmm/rmm/allocators/numba.py +++ b/python/rmm/rmm/allocators/numba.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from numba import config, cuda from numba.cuda import HostOnlyCUDAMemoryManager, IpcHandle, MemoryPointer -from rmm import _lib as librmm +from rmm import pylibrmm def _make_emm_plugin_finalizer(handle, allocations): @@ -70,7 +70,7 @@ def memalloc(self, size): """ Allocate an on-device array from the RMM pool. """ - buf = librmm.DeviceBuffer(size=size) + buf = pylibrmm.DeviceBuffer(size=size) ctx = self.context if config.CUDA_USE_NVIDIA_BINDING: diff --git a/python/rmm/rmm/allocators/torch.py b/python/rmm/rmm/allocators/torch.py index 753da66da..eee0e9df9 100644 --- a/python/rmm/rmm/allocators/torch.py +++ b/python/rmm/rmm/allocators/torch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,10 +28,10 @@ # allocator .so relative to the current file because the current file # is pure Python and will therefore be in the source directory. # Instead, we search relative to an arbitrary file in the compiled - # package. We use the _lib.lib module because it is small. - from rmm._lib import lib + # package. We use the librmm._logger module because it is small. + from rmm.librmm import _logger - sofile = pathlib.Path(lib.__file__).parent / "_torch_allocator.so" + sofile = pathlib.Path(_logger.__file__).parent / "_torch_allocator.so" rmm_torch_allocator = CUDAPluggableAllocator( str(sofile.absolute()), alloc_fn_name="allocate", diff --git a/python/rmm/rmm/_lib/CMakeLists.txt b/python/rmm/rmm/librmm/CMakeLists.txt similarity index 93% rename from python/rmm/rmm/_lib/CMakeLists.txt rename to python/rmm/rmm/librmm/CMakeLists.txt index 7cdfed971..5da2a1a01 100644 --- a/python/rmm/rmm/_lib/CMakeLists.txt +++ b/python/rmm/rmm/librmm/CMakeLists.txt @@ -12,8 +12,7 @@ # the License. # ============================================================================= -set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx - helper.pyx) +set(cython_sources _logger.pyx) set(linked_libraries rmm::rmm) # Build all of the Cython targets diff --git a/python/rmm/rmm/_lib/__init__.pxd b/python/rmm/rmm/librmm/__init__.py similarity index 100% rename from python/rmm/rmm/_lib/__init__.pxd rename to python/rmm/rmm/librmm/__init__.py diff --git a/python/rmm/rmm/librmm/_logger.pxd b/python/rmm/rmm/librmm/_logger.pxd new file mode 100644 index 000000000..fb2126b2f --- /dev/null +++ b/python/rmm/rmm/librmm/_logger.pxd @@ -0,0 +1,66 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp cimport bool + + +cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: + cpdef enum logging_level "spdlog::level::level_enum": + """ + The debug logging level for RMM. + + Debug logging prints messages to a log file. See + `Debug Logging `_ + for more information. + + Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, + INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. + + Examples + -------- + >>> import rmm + >>> rmm.logging_level.DEBUG + + >>> rmm.logging_level.DEBUG.value + 1 + >>> rmm.logging_level.DEBUG.name + 'DEBUG' + + See Also + -------- + set_logging_level : Set the debug logging level + get_logging_level : Get the current debug logging level + """ + TRACE "spdlog::level::trace" + DEBUG "spdlog::level::debug" + INFO "spdlog::level::info" + WARN "spdlog::level::warn" + ERR "spdlog::level::err" + CRITICAL "spdlog::level::critical" + OFF "spdlog::level::off" + + +cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: + cdef cppclass spdlog_logger "spdlog::logger": + spdlog_logger() except + + void set_level(logging_level level) + logging_level level() + void flush() except + + void flush_on(logging_level level) + logging_level flush_level() + bool should_log(logging_level msg_level) + + +cdef extern from "rmm/logger.hpp" namespace "rmm::detail" nogil: + cdef spdlog_logger& logger() except + diff --git a/python/rmm/rmm/librmm/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx new file mode 100644 index 000000000..4392cb106 --- /dev/null +++ b/python/rmm/rmm/librmm/_logger.pyx @@ -0,0 +1,15 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm._logger cimport logging_level # no-cython-lint diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/librmm/_torch_allocator.cpp similarity index 100% rename from python/rmm/rmm/_lib/_torch_allocator.cpp rename to python/rmm/rmm/librmm/_torch_allocator.cpp diff --git a/python/rmm/rmm/librmm/cuda_stream.pxd b/python/rmm/rmm/librmm/cuda_stream.pxd new file mode 100644 index 000000000..3f2ac3361 --- /dev/null +++ b/python/rmm/rmm/librmm/cuda_stream.pxd @@ -0,0 +1,28 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view + + +cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream: + cuda_stream() except + + bool is_valid() except + + cudaStream_t value() except + + cuda_stream_view view() except + + void synchronize() except + + void synchronize_no_throw() diff --git a/python/rmm/rmm/librmm/cuda_stream_pool.pxd b/python/rmm/rmm/librmm/cuda_stream_pool.pxd new file mode 100644 index 000000000..4f2cbb36d --- /dev/null +++ b/python/rmm/rmm/librmm/cuda_stream_pool.pxd @@ -0,0 +1,23 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view + + +cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream_pool: + cuda_stream_pool(size_t pool_size) + cuda_stream_view get_stream() + cuda_stream_view get_stream(size_t stream_id) except + + size_t get_pool_size() diff --git a/python/rmm/rmm/librmm/cuda_stream_view.pxd b/python/rmm/rmm/librmm/cuda_stream_view.pxd new file mode 100644 index 000000000..bf0d33c24 --- /dev/null +++ b/python/rmm/rmm/librmm/cuda_stream_view.pxd @@ -0,0 +1,32 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool + + +cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil: + cdef cppclass cuda_stream_view: + cuda_stream_view() + cuda_stream_view(cudaStream_t) + cudaStream_t value() + bool is_default() + bool is_per_thread_default() + void synchronize() except + + + cdef bool operator==(cuda_stream_view const, cuda_stream_view const) + + const cuda_stream_view cuda_stream_default + const cuda_stream_view cuda_stream_legacy + const cuda_stream_view cuda_stream_per_thread diff --git a/python/rmm/rmm/librmm/device_buffer.pxd b/python/rmm/rmm/librmm/device_buffer.pxd new file mode 100644 index 000000000..1c503ac9a --- /dev/null +++ b/python/rmm/rmm/librmm/device_buffer.pxd @@ -0,0 +1,58 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: + cdef cppclass cuda_device_id: + ctypedef int value_type + cuda_device_id() + cuda_device_id(value_type id) + value_type value() + + cdef cuda_device_id get_current_cuda_device() + +cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil: + cdef void prefetch(const void* ptr, + size_t bytes, + cuda_device_id device, + cuda_stream_view stream) except + + +cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: + cdef cppclass device_buffer: + device_buffer() + device_buffer( + size_t size, + cuda_stream_view stream, + device_memory_resource * + ) except + + device_buffer( + const void* source_data, + size_t size, + cuda_stream_view stream, + device_memory_resource * + ) except + + device_buffer( + const device_buffer buf, + cuda_stream_view stream, + device_memory_resource * + ) except + + void reserve(size_t new_capacity, cuda_stream_view stream) except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + + void* data() + size_t size() + size_t capacity() diff --git a/python/rmm/rmm/librmm/device_uvector.pxd b/python/rmm/rmm/librmm/device_uvector.pxd new file mode 100644 index 000000000..f560a9e38 --- /dev/null +++ b/python/rmm/rmm/librmm/device_uvector.pxd @@ -0,0 +1,39 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.device_buffer cimport device_buffer +from rmm.librmm.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: + cdef cppclass device_uvector[T]: + device_uvector(size_t size, cuda_stream_view stream) except + + T* element_ptr(size_t index) + void set_element(size_t element_index, const T& v, cuda_stream_view s) + void set_element_async( + size_t element_index, + const T& v, + cuda_stream_view s + ) except + + T front_element(cuda_stream_view s) except + + T back_element(cuda_stream_view s) except + + void reserve(size_t new_capacity, cuda_stream_view stream) except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + + device_buffer release() + size_t capacity() + T* data() + size_t size() + device_memory_resource* memory_resource() diff --git a/python/rmm/rmm/librmm/memory_resource.pxd b/python/rmm/rmm/librmm/memory_resource.pxd new file mode 100644 index 000000000..9e7b70c4f --- /dev/null +++ b/python/rmm/rmm/librmm/memory_resource.pxd @@ -0,0 +1,239 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This import is needed for Cython typing in translate_python_except_to_cpp +# See https://github.com/cython/cython/issues/5589 +from builtins import BaseException + +from libc.stddef cimport size_t +from libc.stdint cimport int8_t, int64_t +from libcpp cimport bool +from libcpp.optional cimport optional +from libcpp.pair cimport pair +from libcpp.string cimport string + +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass device_memory_resource: + void* allocate(size_t bytes) except + + void* allocate(size_t bytes, cuda_stream_view stream) except + + void deallocate(void* ptr, size_t bytes) except + + void deallocate( + void* ptr, + size_t bytes, + cuda_stream_view stream + ) except + + +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + pair[size_t, size_t] available_device_memory() except + + +# Transparent handle of a C++ exception +ctypedef pair[int, string] CppExcept + +cdef inline CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: + """Translate a Python exception into a C++ exception handle + + The returned exception handle can then be thrown by `throw_cpp_except()`, + which MUST be done without holding the GIL. + + This is useful when C++ calls a Python function and needs to catch or + propagate exceptions. + """ + if isinstance(err, MemoryError): + return CppExcept(0, str.encode(str(err))) + return CppExcept(-1, str.encode(str(err))) + +# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`. +# This function MUST be called without the GIL otherwise the thrown C++ +# exception are translated back into a Python exception. +cdef extern from *: + """ + #include + #include + + void throw_cpp_except(std::pair res) { + switch(res.first) { + case 0: + throw rmm::out_of_memory(res.second); + default: + throw std::runtime_error(res.second); + } + } + """ + void throw_cpp_except(CppExcept) nogil + + +cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass cuda_memory_resource(device_memory_resource): + cuda_memory_resource() except + + +cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass managed_memory_resource(device_memory_resource): + managed_memory_resource() except + + +cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass system_memory_resource(device_memory_resource): + system_memory_resource() except + + +cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass sam_headroom_memory_resource(device_memory_resource): + sam_headroom_memory_resource(size_t headroom) except + + +cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + + cdef cppclass cuda_async_memory_resource(device_memory_resource): + cuda_async_memory_resource( + optional[size_t] initial_pool_size, + optional[size_t] release_threshold, + optional[allocation_handle_type] export_handle_type) except + + +# TODO: when we adopt Cython 3.0 use enum class +cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ + namespace \ + "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \ + nogil: + enum allocation_handle_type \ + "rmm::mr::cuda_async_memory_resource::allocation_handle_type": + none + posix_file_descriptor + win32 + win32_kmt + + +cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass pool_memory_resource[Upstream](device_memory_resource): + pool_memory_resource( + Upstream* upstream_mr, + size_t initial_pool_size, + optional[size_t] maximum_pool_size) except + + size_t pool_size() + +cdef extern from "rmm/mr/device/arena_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass arena_memory_resource[Upstream](device_memory_resource): + arena_memory_resource( + Upstream* upstream_mr, + optional[size_t] arena_size, + bool dump_log_on_failure + ) except + + +cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource): + fixed_size_memory_resource( + Upstream* upstream_mr, + size_t block_size, + size_t block_to_preallocate) except + + +cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*) + ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*) + + cdef cppclass callback_memory_resource(device_memory_resource): + callback_memory_resource( + allocate_callback_t allocate_callback, + deallocate_callback_t deallocate_callback, + void* allocate_callback_arg, + void* deallocate_callback_arg + ) except + + +cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass binning_memory_resource[Upstream](device_memory_resource): + binning_memory_resource(Upstream* upstream_mr) except + + binning_memory_resource( + Upstream* upstream_mr, + int8_t min_size_exponent, + int8_t max_size_exponent) except + + + void add_bin(size_t allocation_size) except + + void add_bin( + size_t allocation_size, + device_memory_resource* bin_resource) except + + +cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource): + limiting_resource_adaptor( + Upstream* upstream_mr, + size_t allocation_limit) except + + + size_t get_allocated_bytes() except + + size_t get_allocation_limit() except + + +cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): + logging_resource_adaptor( + Upstream* upstream_mr, + string filename) except + + + void flush() except + + +cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource): + struct counter: + counter() + + int64_t value + int64_t peak + int64_t total + + statistics_resource_adaptor(Upstream* upstream_mr) except + + + counter get_bytes_counter() except + + counter get_allocations_counter() except + + pair[counter, counter] pop_counters() except + + pair[counter, counter] push_counters() except + + +cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource): + tracking_resource_adaptor( + Upstream* upstream_mr, + bool capture_stacks) except + + + size_t get_allocated_bytes() except + + string get_outstanding_allocations_str() except + + void log_outstanding_allocations() except + + +cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + ctypedef bool (*failure_callback_t)(size_t, void*) + cdef cppclass failure_callback_resource_adaptor[Upstream]( + device_memory_resource + ): + failure_callback_resource_adaptor( + Upstream* upstream_mr, + failure_callback_t callback, + void* callback_arg + ) except + + +cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource): + prefetch_resource_adaptor(Upstream* upstream_mr) except + diff --git a/python/rmm/rmm/librmm/per_device_resource.pxd b/python/rmm/rmm/librmm/per_device_resource.pxd new file mode 100644 index 000000000..63ee29056 --- /dev/null +++ b/python/rmm/rmm/librmm/per_device_resource.pxd @@ -0,0 +1,36 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from rmm.librmm.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: + cdef cppclass cuda_device_id: + ctypedef int value_type + + cuda_device_id(value_type id) + + value_type value() + +cdef extern from "rmm/mr/device/per_device_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef device_memory_resource* set_current_device_resource( + device_memory_resource* new_mr + ) + cdef device_memory_resource* get_current_device_resource() + cdef device_memory_resource* set_per_device_resource( + cuda_device_id id, device_memory_resource* new_mr + ) + cdef device_memory_resource* get_per_device_resource ( + cuda_device_id id + ) diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 6eb94da0f..82729271f 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from rmm._lib.memory_resource import ( +from rmm.pylibrmm.memory_resource import ( + ArenaMemoryResource, BinningMemoryResource, CallbackMemoryResource, CudaAsyncMemoryResource, @@ -45,6 +46,7 @@ ) __all__ = [ + "ArenaMemoryResource", "BinningMemoryResource", "CallbackMemoryResource", "CudaAsyncMemoryResource", diff --git a/python/rmm/rmm/pylibrmm/CMakeLists.txt b/python/rmm/rmm/pylibrmm/CMakeLists.txt new file mode 100644 index 000000000..0e88f01bb --- /dev/null +++ b/python/rmm/rmm/pylibrmm/CMakeLists.txt @@ -0,0 +1,27 @@ +# ============================================================================= +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources device_buffer.pyx logger.pyx memory_resource.pyx cuda_stream.pyx helper.pyx) +set(linked_libraries rmm::rmm) + +# Build all of the Cython targets +rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" + CXX) + +# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols +# loaded from other DSOs +foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden + CXX_VISIBILITY_PRESET hidden) +endforeach() diff --git a/python/rmm/rmm/pylibrmm/__init__.py b/python/rmm/rmm/pylibrmm/__init__.py new file mode 100644 index 000000000..0b8672ef6 --- /dev/null +++ b/python/rmm/rmm/pylibrmm/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .device_buffer import DeviceBuffer diff --git a/python/rmm/rmm/pylibrmm/cuda_stream.pxd b/python/rmm/rmm/pylibrmm/cuda_stream.pxd new file mode 100644 index 000000000..dd38387c2 --- /dev/null +++ b/python/rmm/rmm/pylibrmm/cuda_stream.pxd @@ -0,0 +1,27 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cimport cython +from cuda.ccudart cimport cudaStream_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr + +from rmm.librmm.cuda_stream cimport cuda_stream + + +@cython.final +cdef class CudaStream: + cdef unique_ptr[cuda_stream] c_obj + cdef cudaStream_t value(self) except * nogil + cdef bool is_valid(self) except * nogil diff --git a/python/rmm/rmm/_lib/cuda_stream.pyx b/python/rmm/rmm/pylibrmm/cuda_stream.pyx similarity index 91% rename from python/rmm/rmm/_lib/cuda_stream.pyx rename to python/rmm/rmm/pylibrmm/cuda_stream.pyx index 0861f0663..d6aa4edc7 100644 --- a/python/rmm/rmm/_lib/cuda_stream.pyx +++ b/python/rmm/rmm/pylibrmm/cuda_stream.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ cimport cython from cuda.ccudart cimport cudaStream_t from libcpp cimport bool +from rmm.librmm.cuda_stream cimport cuda_stream + @cython.final cdef class CudaStream: diff --git a/python/rmm/rmm/pylibrmm/device_buffer.pxd b/python/rmm/rmm/pylibrmm/device_buffer.pxd new file mode 100644 index 000000000..a0d287423 --- /dev/null +++ b/python/rmm/rmm/pylibrmm/device_buffer.pxd @@ -0,0 +1,71 @@ +# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libc.stdint cimport uintptr_t +from libcpp.memory cimport unique_ptr + +from rmm._cuda.stream cimport Stream +from rmm.librmm.device_buffer cimport device_buffer +from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource + + +cdef class DeviceBuffer: + cdef unique_ptr[device_buffer] c_obj + + # Holds a reference to the DeviceMemoryResource used for allocation. + # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is + # needed for deallocation + cdef DeviceMemoryResource mr + + # Holds a reference to the stream used by the underlying `device_buffer`. + # Ensures the stream does not get destroyed before this DeviceBuffer + cdef Stream stream + + @staticmethod + cdef DeviceBuffer c_from_unique_ptr( + unique_ptr[device_buffer] ptr, + Stream stream=*, + DeviceMemoryResource mr=*, + ) + + @staticmethod + cdef DeviceBuffer c_to_device(const unsigned char[::1] b, + Stream stream=*) except * + cpdef copy_to_host(self, ary=*, Stream stream=*) + cpdef copy_from_host(self, ary, Stream stream=*) + cpdef copy_from_device(self, cuda_ary, Stream stream=*) + cpdef bytes tobytes(self, Stream stream=*) + + cdef size_t c_size(self) except * + cpdef void reserve(self, size_t new_capacity, Stream stream=*) except * + cpdef void resize(self, size_t new_size, Stream stream=*) except * + cpdef size_t capacity(self) except * + cdef void* c_data(self) except * + + cdef device_buffer c_release(self) except * + +cpdef DeviceBuffer to_device(const unsigned char[::1] b, + Stream stream=*) +cpdef void copy_ptr_to_host(uintptr_t db, + unsigned char[::1] hb, + Stream stream=*) except * + +cpdef void copy_host_to_ptr(const unsigned char[::1] hb, + uintptr_t db, + Stream stream=*) except * + +cpdef void copy_device_to_ptr(uintptr_t d_src, + uintptr_t d_dst, + size_t count, + Stream stream=*) except * diff --git a/python/rmm/rmm/_lib/device_buffer.pyx b/python/rmm/rmm/pylibrmm/device_buffer.pyx similarity index 96% rename from python/rmm/rmm/_lib/device_buffer.pyx rename to python/rmm/rmm/pylibrmm/device_buffer.pyx index 94a4dc771..c2e95e845 100644 --- a/python/rmm/rmm/_lib/device_buffer.pyx +++ b/python/rmm/rmm/pylibrmm/device_buffer.pyx @@ -32,9 +32,16 @@ from cuda.ccudart cimport ( cudaStream_t, ) -from rmm._lib.memory_resource cimport ( +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.device_buffer cimport ( + cuda_device_id, + device_buffer, + get_current_cuda_device, + prefetch, +) +from rmm.librmm.memory_resource cimport device_memory_resource +from rmm.pylibrmm.memory_resource cimport ( DeviceMemoryResource, - device_memory_resource, get_current_device_resource, ) @@ -149,7 +156,7 @@ cdef class DeviceBuffer: device : optional The CUDA device to which to prefetch the memory for this buffer. Defaults to the current CUDA device. To prefetch to the CPU, pass - :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device. + :py:attr:`~cuda.bindings.runtime.cudaCpuDeviceId` as the device. stream : optional CUDA stream to use for prefetching. Defaults to self.stream """ @@ -394,7 +401,7 @@ cpdef DeviceBuffer to_device(const unsigned char[::1] b, Examples -------- >>> import rmm - >>> db = rmm._lib.device_buffer.to_device(b"abc") + >>> db = rmm.pylibrmm.device_buffer.to_device(b"abc") >>> print(bytes(db)) b'abc' """ @@ -460,7 +467,7 @@ cpdef void copy_ptr_to_host(uintptr_t db, >>> import rmm >>> db = rmm.DeviceBuffer.to_device(b"abc") >>> hb = bytearray(db.nbytes) - >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb) + >>> rmm.pylibrmm.device_buffer.copy_ptr_to_host(db.ptr, hb) >>> print(hb) bytearray(b'abc') """ @@ -502,7 +509,7 @@ cpdef void copy_host_to_ptr(const unsigned char[::1] hb, >>> import rmm >>> db = rmm.DeviceBuffer(size=10) >>> hb = b"abc" - >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr) + >>> rmm.pylibrmm.device_buffer.copy_host_to_ptr(hb, db.ptr) >>> hb = db.copy_to_host() >>> print(hb) array([97, 98, 99, 0, 0, 0, 0, 0, 0, 0], dtype=uint8) @@ -541,7 +548,7 @@ cpdef void copy_device_to_ptr(uintptr_t d_src, >>> import rmm >>> db = rmm.DeviceBuffer(size=5) >>> db2 = rmm.DeviceBuffer.to_device(b"abc") - >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) + >>> rmm.pylibrmm.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size) >>> hb = db.copy_to_host() >>> hb array([97, 98, 99, 0, 0], dtype=uint8) diff --git a/python/rmm/rmm/_lib/lib.pyx b/python/rmm/rmm/pylibrmm/helper.pxd similarity index 86% rename from python/rmm/rmm/_lib/lib.pyx rename to python/rmm/rmm/pylibrmm/helper.pxd index 46753baa3..8ca151c00 100644 --- a/python/rmm/rmm/_lib/lib.pyx +++ b/python/rmm/rmm/pylibrmm/helper.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + + +cdef object parse_bytes(object s) except * diff --git a/python/rmm/rmm/_lib/helper.pyx b/python/rmm/rmm/pylibrmm/helper.pyx similarity index 100% rename from python/rmm/rmm/_lib/helper.pyx rename to python/rmm/rmm/pylibrmm/helper.pyx diff --git a/python/rmm/rmm/_lib/logger.pyx b/python/rmm/rmm/pylibrmm/logger.pyx similarity index 77% rename from python/rmm/rmm/_lib/logger.pyx rename to python/rmm/rmm/pylibrmm/logger.pyx index 029bbdd79..119e1c92f 100644 --- a/python/rmm/rmm/_lib/logger.pyx +++ b/python/rmm/rmm/pylibrmm/logger.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,58 +14,9 @@ import warnings -from libcpp cimport bool - - -cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: - cpdef enum logging_level "spdlog::level::level_enum": - """ - The debug logging level for RMM. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, - INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. - - Examples - -------- - >>> import rmm - >>> rmm.logging_level.DEBUG - - >>> rmm.logging_level.DEBUG.value - 1 - >>> rmm.logging_level.DEBUG.name - 'DEBUG' - - See Also - -------- - set_logging_level : Set the debug logging level - get_logging_level : Get the current debug logging level - """ - TRACE "spdlog::level::trace" - DEBUG "spdlog::level::debug" - INFO "spdlog::level::info" - WARN "spdlog::level::warn" - ERR "spdlog::level::err" - CRITICAL "spdlog::level::critical" - OFF "spdlog::level::off" - - -cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: - cdef cppclass spdlog_logger "spdlog::logger": - spdlog_logger() except + - void set_level(logging_level level) - logging_level level() - void flush() except + - void flush_on(logging_level level) - logging_level flush_level() - bool should_log(logging_level msg_level) - - -cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: - cdef spdlog_logger& logger() except + +from rmm.librmm._logger cimport logger + +from rmm.librmm._logger import logging_level def _validate_level_type(level): diff --git a/python/rmm/rmm/pylibrmm/memory_resource.pxd b/python/rmm/rmm/pylibrmm/memory_resource.pxd new file mode 100644 index 000000000..d1e5610db --- /dev/null +++ b/python/rmm/rmm/pylibrmm/memory_resource.pxd @@ -0,0 +1,86 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp.memory cimport shared_ptr + +from rmm.librmm.memory_resource cimport device_memory_resource + + +cdef class DeviceMemoryResource: + cdef shared_ptr[device_memory_resource] c_obj + cdef device_memory_resource* get_mr(self) noexcept nogil + +cdef class UpstreamResourceAdaptor(DeviceMemoryResource): + cdef readonly DeviceMemoryResource upstream_mr + + cpdef DeviceMemoryResource get_upstream(self) + +cdef class ArenaMemoryResource(UpstreamResourceAdaptor): + pass + +cdef class CudaMemoryResource(DeviceMemoryResource): + pass + +cdef class ManagedMemoryResource(DeviceMemoryResource): + pass + +cdef class SystemMemoryResource(DeviceMemoryResource): + pass + +cdef class SamHeadroomMemoryResource(DeviceMemoryResource): + pass + +cdef class CudaAsyncMemoryResource(DeviceMemoryResource): + pass + +cdef class PoolMemoryResource(UpstreamResourceAdaptor): + pass + +cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): + pass + +cdef class BinningMemoryResource(UpstreamResourceAdaptor): + + cdef readonly list _bin_mrs + + cpdef add_bin( + self, + size_t allocation_size, + DeviceMemoryResource bin_resource=*) + +cdef class CallbackMemoryResource(DeviceMemoryResource): + cdef object _allocate_func + cdef object _deallocate_func + +cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): + cdef object _log_file_name + cpdef get_file_name(self) + cpdef flush(self) + +cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor): + pass + +cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor): + cdef object _callback + +cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor): + pass + +cpdef DeviceMemoryResource get_current_device_resource() diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/pylibrmm/memory_resource.pyx similarity index 82% rename from python/rmm/rmm/_lib/memory_resource.pyx rename to python/rmm/rmm/pylibrmm/memory_resource.pyx index 231253e3f..b41890fca 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/pylibrmm/memory_resource.pyx @@ -22,12 +22,11 @@ from collections import defaultdict cimport cython from cython.operator cimport dereference as deref from libc.stddef cimport size_t -from libc.stdint cimport int8_t, int64_t, uintptr_t +from libc.stdint cimport int8_t, uintptr_t from libcpp cimport bool from libcpp.memory cimport make_unique, unique_ptr from libcpp.optional cimport optional from libcpp.pair cimport pair -from libcpp.string cimport string from cuda.cudart import cudaError_t @@ -37,206 +36,44 @@ from rmm._cuda.stream cimport Stream from rmm._cuda.stream import DEFAULT_STREAM -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.helper cimport parse_bytes -from rmm._lib.memory_resource cimport ( - available_device_memory as c_available_device_memory, - percent_of_free_device_memory as c_percent_of_free_device_memory, -) -from rmm._lib.per_device_resource cimport ( +from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.per_device_resource cimport ( cuda_device_id, set_per_device_resource as cpp_set_per_device_resource, ) +from rmm.pylibrmm.helper cimport parse_bytes from rmm.statistics import Statistics -# Transparent handle of a C++ exception -ctypedef pair[int, string] CppExcept - -cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept: - """Translate a Python exception into a C++ exception handle - - The returned exception handle can then be thrown by `throw_cpp_except()`, - which MUST be done without holding the GIL. - - This is useful when C++ calls a Python function and needs to catch or - propagate exceptions. - """ - if isinstance(err, MemoryError): - return CppExcept(0, str.encode(str(err))) - return CppExcept(-1, str.encode(str(err))) - -# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`. -# This function MUST be called without the GIL otherwise the thrown C++ -# exception are translated back into a Python exception. -cdef extern from *: - """ - #include - #include - - void throw_cpp_except(std::pair res) { - switch(res.first) { - case 0: - throw rmm::out_of_memory(res.second); - default: - throw std::runtime_error(res.second); - } - } - """ - void throw_cpp_except(CppExcept) nogil - - -# NOTE: Keep extern declarations in .pyx file as much as possible to avoid -# leaking dependencies when importing RMM Cython .pxd files -cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass cuda_memory_resource(device_memory_resource): - cuda_memory_resource() except + - -cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass managed_memory_resource(device_memory_resource): - managed_memory_resource() except + - -cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass system_memory_resource(device_memory_resource): - system_memory_resource() except + - -cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass sam_headroom_memory_resource(device_memory_resource): - sam_headroom_memory_resource(size_t headroom) except + - -cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - - cdef cppclass cuda_async_memory_resource(device_memory_resource): - cuda_async_memory_resource( - optional[size_t] initial_pool_size, - optional[size_t] release_threshold, - optional[allocation_handle_type] export_handle_type) except + - -# TODO: when we adopt Cython 3.0 use enum class -cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ - namespace \ - "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \ - nogil: - enum allocation_handle_type \ - "rmm::mr::cuda_async_memory_resource::allocation_handle_type": - none - posix_file_descriptor - win32 - win32_kmt - - -cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass pool_memory_resource[Upstream](device_memory_resource): - pool_memory_resource( - Upstream* upstream_mr, - size_t initial_pool_size, - optional[size_t] maximum_pool_size) except + - size_t pool_size() - -cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource): - fixed_size_memory_resource( - Upstream* upstream_mr, - size_t block_size, - size_t block_to_preallocate) except + - -cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*) - ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*) - - cdef cppclass callback_memory_resource(device_memory_resource): - callback_memory_resource( - allocate_callback_t allocate_callback, - deallocate_callback_t deallocate_callback, - void* allocate_callback_arg, - void* deallocate_callback_arg - ) except + - -cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass binning_memory_resource[Upstream](device_memory_resource): - binning_memory_resource(Upstream* upstream_mr) except + - binning_memory_resource( - Upstream* upstream_mr, - int8_t min_size_exponent, - int8_t max_size_exponent) except + - - void add_bin(size_t allocation_size) except + - void add_bin( - size_t allocation_size, - device_memory_resource* bin_resource) except + - -cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource): - limiting_resource_adaptor( - Upstream* upstream_mr, - size_t allocation_limit) except + - - size_t get_allocated_bytes() except + - size_t get_allocation_limit() except + - -cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): - logging_resource_adaptor( - Upstream* upstream_mr, - string filename) except + - - void flush() except + - -cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource): - struct counter: - counter() - - int64_t value - int64_t peak - int64_t total - - statistics_resource_adaptor(Upstream* upstream_mr) except + - - counter get_bytes_counter() except + - counter get_allocations_counter() except + - pair[counter, counter] pop_counters() except + - pair[counter, counter] push_counters() except + - -cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource): - tracking_resource_adaptor( - Upstream* upstream_mr, - bool capture_stacks) except + - - size_t get_allocated_bytes() except + - string get_outstanding_allocations_str() except + - void log_outstanding_allocations() except + - -cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - ctypedef bool (*failure_callback_t)(size_t, void*) - cdef cppclass failure_callback_resource_adaptor[Upstream]( - device_memory_resource - ): - failure_callback_resource_adaptor( - Upstream* upstream_mr, - failure_callback_t callback, - void* callback_arg - ) except + - -cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource): - prefetch_resource_adaptor(Upstream* upstream_mr) except + +from rmm.librmm.memory_resource cimport ( + CppExcept, + allocate_callback_t, + allocation_handle_type, + arena_memory_resource, + available_device_memory as c_available_device_memory, + binning_memory_resource, + callback_memory_resource, + cuda_async_memory_resource, + cuda_memory_resource, + deallocate_callback_t, + device_memory_resource, + failure_callback_resource_adaptor, + failure_callback_t, + fixed_size_memory_resource, + limiting_resource_adaptor, + logging_resource_adaptor, + managed_memory_resource, + percent_of_free_device_memory as c_percent_of_free_device_memory, + pool_memory_resource, + posix_file_descriptor, + prefetch_resource_adaptor, + sam_headroom_memory_resource, + statistics_resource_adaptor, + system_memory_resource, + throw_cpp_except, + tracking_resource_adaptor, + translate_python_except_to_cpp, +) cdef class DeviceMemoryResource: @@ -474,6 +311,48 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): ) return c_mr.pool_size() +cdef class ArenaMemoryResource(UpstreamResourceAdaptor): + def __cinit__( + self, DeviceMemoryResource upstream_mr, + arena_size=None, + dump_log_on_failure=False + ): + cdef optional[size_t] c_arena_size = ( + optional[size_t]() if + arena_size is None + else optional[size_t]( parse_bytes(arena_size)) + ) + self.c_obj.reset( + new arena_memory_resource[device_memory_resource]( + upstream_mr.get_mr(), + c_arena_size, + dump_log_on_failure, + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + object arena_size=None, + bool dump_log_on_failure=False + ): + """ + A suballocator that emphasizes fragmentation avoidance and scalable concurrency + support. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The DeviceMemoryResource from which to allocate memory for arenas. + arena_size : int, optional + Size in bytes of the global arena. Defaults to half of the available memory + on the current device. + dump_log_on_failure : bool, optional + Whether to dump the arena on allocation failure. + """ + pass + + cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor): def __cinit__( self, diff --git a/python/rmm/rmm/_lib/tests/__init__.py b/python/rmm/rmm/pylibrmm/tests/__init__.py similarity index 100% rename from python/rmm/rmm/_lib/tests/__init__.py rename to python/rmm/rmm/pylibrmm/tests/__init__.py diff --git a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx similarity index 83% rename from python/rmm/rmm/_lib/tests/test_device_buffer.pyx rename to python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx index 733383827..ec2ff4def 100644 --- a/python/rmm/rmm/_lib/tests/test_device_buffer.pyx +++ b/python/rmm/rmm/pylibrmm/tests/test_device_buffer.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,8 +16,9 @@ import numpy as np from libcpp.memory cimport make_unique -from rmm._lib.cuda_stream_view cimport cuda_stream_default -from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer +from rmm.librmm.cuda_stream_view cimport cuda_stream_default +from rmm.librmm.device_buffer cimport device_buffer +from rmm.pylibrmm.device_buffer cimport DeviceBuffer def test_release(): diff --git a/python/rmm/rmm/statistics.py b/python/rmm/rmm/statistics.py index 279e45dc6..2dabedce6 100644 --- a/python/rmm/rmm/statistics.py +++ b/python/rmm/rmm/statistics.py @@ -74,8 +74,8 @@ def enable_statistics() -> None: def get_statistics() -> Optional[Statistics]: """Get the current allocation statistics. - Return - ------ + Returns + ------- If enabled, returns the current tracked statistics. If disabled, returns None. """ @@ -94,8 +94,8 @@ def push_statistics() -> Optional[Statistics]: If statistics are disabled (the current memory resource is not an instance of StatisticsResourceAdaptor), this function is a no-op. - Return - ------ + Returns + ------- If enabled, returns the current tracked statistics _before_ the pop. If disabled, returns None. """ @@ -114,8 +114,8 @@ def pop_statistics() -> Optional[Statistics]: If statistics are disabled (the current memory resource is not an instance of StatisticsResourceAdaptor), this function is a no-op. - Return - ------ + Returns + ------- If enabled, returns the popped counters. If disabled, returns None. """ @@ -232,8 +232,8 @@ def report( ordered_by Sort the statistics by this attribute. - Return - ------ + Returns + ------- The pretty formatted string of the memory statistics """ @@ -279,8 +279,8 @@ def _get_descriptive_name_of_object(obj: object) -> str: obj Object in question - Return - ------ + Returns + ------- A string including filename, line number, and object name. """ diff --git a/python/rmm/rmm/tests/test_cython.py b/python/rmm/rmm/tests/test_cython.py index 82eba2451..5df933435 100644 --- a/python/rmm/rmm/tests/test_cython.py +++ b/python/rmm/rmm/tests/test_cython.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ def wrapped(*args, **kwargs): return wrapped -cython_test_modules = ["rmm._lib.tests.test_device_buffer"] +cython_test_modules = ["rmm.pylibrmm.tests.test_device_buffer"] for mod in cython_test_modules: diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index c88d21b38..182434dc5 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -32,12 +32,6 @@ cuda.set_memory_manager(RMMNumbaManager) -_driver_version = rmm._cuda.gpu.driverGetVersion() -_runtime_version = rmm._cuda.gpu.runtimeGetVersion() -_CUDAMALLOC_ASYNC_SUPPORTED = (_driver_version >= 11020) and ( - _runtime_version >= 11020 -) - _SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess, rmm._cuda.gpu.getDevice(), @@ -354,7 +348,7 @@ def test_rmm_pool_numba_stream(stream): rmm.reinitialize(pool_allocator=True) stream = rmm._cuda.stream.Stream(stream) - a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream) + a = rmm.pylibrmm.device_buffer.DeviceBuffer(size=3, stream=stream) assert a.size == 3 assert a.ptr != 0 @@ -505,6 +499,28 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): array_tester(dtype, nelem, alloc) +@pytest.mark.parametrize("dtype", _dtypes) +@pytest.mark.parametrize("nelem", _nelems) +@pytest.mark.parametrize("alloc", _allocs) +@pytest.mark.parametrize( + "upstream_mr", + [ + lambda: rmm.mr.CudaMemoryResource(), + lambda: rmm.mr.ManagedMemoryResource(), + lambda: rmm.mr.PoolMemoryResource( + rmm.mr.CudaMemoryResource(), 1 << 20 + ), + ], +) +def test_arena_memory_resource(dtype, nelem, alloc, upstream_mr): + upstream = upstream_mr() + mr = rmm.mr.ArenaMemoryResource(upstream) + + rmm.mr.set_current_device_resource(mr) + assert rmm.mr.get_current_device_resource_type() is type(mr) + array_tester(dtype, nelem, alloc) + + def test_reinitialize_max_pool_size(): rmm.reinitialize( pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB" @@ -635,10 +651,6 @@ def test_mr_upstream_lifetime(): del pool_mr -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) @@ -649,15 +661,11 @@ def test_cuda_async_memory_resource(dtype, nelem, alloc): array_tester(dtype, nelem, alloc) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) def test_cuda_async_memory_resource_ipc(): # TODO: We don't have a great way to check if IPC is supported in Python, # without using the C++ function - # rmm::detail::async_alloc::is_export_handle_type_supported. We can't - # accurately test driver and runtime versions for this via Python because + # rmm::detail::runtime_async_alloc::is_export_handle_type_supported. + # We can't accurately test this via Python because # cuda-python always has the IPC handle enum defined (which normally # requires a CUDA 11.3 runtime) and the cuda-compat package in Docker # containers prevents us from assuming that the driver we see actually @@ -680,10 +688,6 @@ def test_cuda_async_memory_resource_ipc(): assert rmm.mr.get_current_device_resource_type() is type(mr) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("nelems", _nelems) def test_cuda_async_memory_resource_stream(nelems): # test that using CudaAsyncMemoryResource @@ -697,10 +701,6 @@ def test_cuda_async_memory_resource_stream(nelems): np.testing.assert_equal(expected, result) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) def test_cuda_async_memory_resource_threshold(nelem, alloc): @@ -717,13 +717,7 @@ def test_cuda_async_memory_resource_threshold(nelem, alloc): "mr", [ rmm.mr.CudaMemoryResource, - pytest.param( - rmm.mr.CudaAsyncMemoryResource, - marks=pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", - ), - ), + pytest.param(rmm.mr.CudaAsyncMemoryResource), ], ) def test_limiting_resource_adaptor(mr): @@ -801,10 +795,28 @@ def callback(nbytes: int) -> bool: rmm.mr.set_current_device_resource(mr) with pytest.raises(MemoryError): - rmm.DeviceBuffer(size=int(1e11)) + from rmm.mr import available_device_memory + + total_memory = available_device_memory()[1] + rmm.DeviceBuffer(size=total_memory * 2) assert retried[0] +def test_failure_callback_resource_adaptor_error(): + def callback(nbytes: int) -> bool: + raise RuntimeError("MyError") + + cuda_mr = rmm.mr.CudaMemoryResource() + mr = rmm.mr.FailureCallbackResourceAdaptor(cuda_mr, callback) + rmm.mr.set_current_device_resource(mr) + + with pytest.raises(RuntimeError, match="MyError"): + from rmm.mr import available_device_memory + + total_memory = available_device_memory()[1] + rmm.DeviceBuffer(size=total_memory * 2) + + @pytest.mark.parametrize("managed", [True, False]) def test_prefetch_resource_adaptor(managed): if managed: @@ -829,18 +841,6 @@ def test_prefetch_resource_adaptor(managed): assert_prefetched(db, device) -def test_failure_callback_resource_adaptor_error(): - def callback(nbytes: int) -> bool: - raise RuntimeError("MyError") - - cuda_mr = rmm.mr.CudaMemoryResource() - mr = rmm.mr.FailureCallbackResourceAdaptor(cuda_mr, callback) - rmm.mr.set_current_device_resource(mr) - - with pytest.raises(RuntimeError, match="MyError"): - rmm.DeviceBuffer(size=int(1e11)) - - def test_dev_buf_circle_ref_dealloc(): # This test creates a reference cycle containing a `DeviceBuffer` # and ensures that the garbage collector does not clear it, i.e., @@ -1076,3 +1076,9 @@ def test_available_device_memory(): assert initial_memory[1] == final_memory[1] assert initial_memory[0] > 0 assert final_memory[0] > 0 + + +# TODO: Remove test when rmm._lib is removed in 25.02 +def test_deprecate_rmm_lib(): + with pytest.warns(FutureWarning): + rmm._lib.device_buffer.DeviceBuffer(size=100) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ea1af58cd..476028af0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,13 +19,16 @@ option(CODE_COVERAGE "Enable generating code coverage with gcov." OFF) include(rapids-test) rapids_test_init() +# Ensure tests are using the conda env, so they have the correct release/debug compile flags +rapids_cmake_support_conda_env(conda_env) + # This function takes in a test name and test source and handles setting all of the associated # properties and linking to build the test function(ConfigureTestInternal TEST_NAME) add_executable(${TEST_NAME} ${ARGN}) target_include_directories(${TEST_NAME} PRIVATE "$") target_link_libraries(${TEST_NAME} GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main - pthread rmm) + pthread rmm $) set_target_properties( ${TEST_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON @@ -38,9 +41,7 @@ function(ConfigureTestInternal TEST_NAME) CUDA_STANDARD_REQUIRED ON) target_compile_definitions(${TEST_NAME} PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") - target_compile_options(${TEST_NAME} PUBLIC $<$:-Wall -Werror - -Wno-error=deprecated-declarations>) - target_compile_options(${TEST_NAME} PUBLIC "$<$:-O0>") + target_compile_options(${TEST_NAME} PUBLIC $<$:-Wall -Werror>) if(DISABLE_DEPRECATION_WARNING) target_compile_options( @@ -83,7 +84,7 @@ endfunction() function(ConfigureTest TEST_NAME) set(options) - set(one_value GPUS PERCENT) + set(one_value CUDART GPUS PERCENT) set(multi_value) cmake_parse_arguments(_RMM_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) if(NOT DEFINED _RMM_TEST_GPUS AND NOT DEFINED _RMM_TEST_PERCENT) @@ -97,13 +98,23 @@ function(ConfigureTest TEST_NAME) set(_RMM_TEST_PERCENT 100) endif() + if(_RMM_TEST_CUDART STREQUAL SHARED) + set(cudart_link_libs $ CUDA::cudart) + elseif(_RMM_TEST_CUDART STREQUAL STATIC) + set(cudart_link_libs $ CUDA::cudart_static) + else() + set(cudart_link_libs rmm) + endif() + # Test with legacy default stream. ConfigureTestInternal(${TEST_NAME} ${_RMM_TEST_UNPARSED_ARGUMENTS}) + target_link_libraries(${TEST_NAME} ${cudart_link_libs}) # Test with per-thread default stream. string(REGEX REPLACE "_TEST$" "_PTDS_TEST" PTDS_TEST_NAME "${TEST_NAME}") ConfigureTestInternal("${PTDS_TEST_NAME}" ${_RMM_TEST_UNPARSED_ARGUMENTS}) target_compile_definitions("${PTDS_TEST_NAME}" PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) + target_link_libraries(${PTDS_TEST_NAME} ${cudart_link_libs}) foreach(name ${TEST_NAME} ${PTDS_TEST_NAME} ${NS_TEST_NAME}) rapids_test_add( @@ -129,7 +140,10 @@ ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp) ConfigureTest(POOL_MR_TEST mr/device/pool_mr_tests.cpp GPUS 1 PERCENT 100) # cuda_async mr tests -ConfigureTest(CUDA_ASYNC_MR_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60) +ConfigureTest(CUDA_ASYNC_MR_STATIC_CUDART_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60 + CUDART STATIC) +ConfigureTest(CUDA_ASYNC_MR_SHARED_CUDART_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60 + CUDART SHARED) # thrust allocator tests ConfigureTest(THRUST_ALLOCATOR_TEST mr/device/thrust_allocator_tests.cu GPUS 1 PERCENT 60) diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index 643281d91..8a5d37be2 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -112,7 +112,6 @@ void expect_log_events(std::string const& filename, // EXPECT_EQ(expected.thread_id, actual.thread_id); // EXPECT_EQ(expected.stream, actual.stream); EXPECT_EQ(expected.act, actual.act); - // device_memory_resource automatically pads an allocation to a multiple of 8 bytes EXPECT_EQ(expected.size, actual.size); EXPECT_EQ(expected.pointer, actual.pointer); return true; diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index bdc0f2438..67f183a23 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -491,9 +491,7 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::available_device_memory().first; - auto const ninety_percent = rmm::align_up( - static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent = rmm::percent_of_free_device_memory(90); arena_mr mr(rmm::mr::get_current_device_resource_ref(), ninety_percent); }()); } @@ -576,10 +574,10 @@ TEST_F(ArenaTest, DumpLogOnFailure) // NOLINT std::size_t num_threads{4}; threads.reserve(num_threads); for (std::size_t i = 0; i < num_threads; ++i) { - threads.emplace_back(std::thread([&] { + threads.emplace_back([&] { void* ptr = mr.allocate(32_KiB); mr.deallocate(ptr, 32_KiB); - })); + }); } for (auto& thread : threads) { diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp index a56efa60c..a7f6ab7be 100644 --- a/tests/mr/device/callback_mr_tests.cpp +++ b/tests/mr/device/callback_mr_tests.cpp @@ -23,11 +23,11 @@ #include #include -#include #include #include #include +#include namespace rmm::test { namespace { @@ -78,8 +78,9 @@ TEST(CallbackTest, LoggingTest) auto* ptr = mr.allocate(size); mr.deallocate(ptr, size); - std::string output = testing::internal::GetCapturedStdout(); - std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", size, size); + auto output = testing::internal::GetCapturedStdout(); + auto expect = std::string("Allocating ") + std::to_string(size) + " bytes\nDeallocating " + + std::to_string(size) + " bytes\n"; ASSERT_EQ(expect, output); } diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index 90c7b0ff9..a39188548 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,24 +31,13 @@ class AsyncMRTest : public ::testing::Test { protected: void SetUp() override { - if (!rmm::detail::async_alloc::is_supported()) { + if (!rmm::detail::runtime_async_alloc::is_supported()) { GTEST_SKIP() << "Skipping tests since cudaMallocAsync not supported with this CUDA " << "driver/runtime version"; } } }; -TEST_F(AsyncMRTest, ThrowIfNotSupported) -{ - auto construct_mr = []() { cuda_async_mr mr; }; -#ifndef RMM_CUDA_MALLOC_ASYNC_SUPPORT - EXPECT_THROW(construct_mr(), rmm::logic_error); -#else - EXPECT_NO_THROW(construct_mr()); -#endif -} - -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) TEST_F(AsyncMRTest, ExplicitInitialPoolSize) { const auto pool_init_size{100}; @@ -77,7 +66,5 @@ TEST_F(AsyncMRTest, DifferentPoolsUnequal) EXPECT_FALSE(mr1.is_equal(mr2)); } -#endif - } // namespace } // namespace rmm::test diff --git a/tests/mr/device/cuda_async_view_mr_tests.cpp b/tests/mr/device/cuda_async_view_mr_tests.cpp index fe82431a9..f3a02cbf0 100644 --- a/tests/mr/device/cuda_async_view_mr_tests.cpp +++ b/tests/mr/device/cuda_async_view_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,13 +29,10 @@ using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource; static_assert(cuda::mr::resource_with); static_assert(cuda::mr::async_resource_with); -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) - TEST(PoolTest, UsePool) { cudaMemPool_t memPool{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool( - &memPool, rmm::get_current_cuda_device().value())); + RMM_CUDA_TRY(cudaDeviceGetDefaultMemPool(&memPool, rmm::get_current_cuda_device().value())); const auto pool_init_size{100}; cuda_async_view_mr mr{memPool}; @@ -53,7 +50,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) cudaMemPool_t memPool{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&memPool, &poolProps)); + RMM_CUDA_TRY(cudaMemPoolCreate(&memPool, &poolProps)); { const auto pool_init_size{100}; @@ -64,7 +61,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) } auto destroy_valid_pool = [&]() { - auto result = rmm::detail::async_alloc::cudaMemPoolDestroy(memPool); + auto result = cudaMemPoolDestroy(memPool); RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool"); }; @@ -81,7 +78,5 @@ TEST(PoolTest, ThrowIfNullptrPool) EXPECT_THROW(construct_mr(), rmm::logic_error); } -#endif - } // namespace } // namespace rmm::test diff --git a/tests/mr/device/mr_ref_multithreaded_tests.cpp b/tests/mr/device/mr_ref_multithreaded_tests.cpp index 7d749efd1..9e7c8c2e8 100644 --- a/tests/mr/device/mr_ref_multithreaded_tests.cpp +++ b/tests/mr/device/mr_ref_multithreaded_tests.cpp @@ -36,17 +36,11 @@ namespace { struct mr_ref_test_mt : public mr_ref_test {}; -INSTANTIATE_TEST_CASE_P(MultiThreadResourceTests, - mr_ref_test_mt, - ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - "CUDA_Async", -#endif - "Managed", - "Pool", - "Arena", - "Binning"), - [](auto const& info) { return info.param; }); +INSTANTIATE_TEST_CASE_P( + MultiThreadResourceTests, + mr_ref_test_mt, + ::testing::Values("CUDA", "CUDA_Async", "Managed", "Pool", "Arena", "Binning"), + [](auto const& info) { return info.param; }); template void spawn_n(std::size_t num_threads, Task task, Arguments&&... args) @@ -109,8 +103,13 @@ TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRef_mt) { // single thread changes default resource, then multiple threads use it auto old = rmm::mr::set_current_device_resource_ref(this->ref); + test_get_current_device_resource_ref(); - spawn([mr = this->ref]() { + int device; + RMM_CUDA_TRY(cudaGetDevice(&device)); + + spawn([device, mr = this->ref]() { + RMM_CUDA_TRY(cudaSetDevice(device)); EXPECT_EQ(mr, rmm::mr::get_current_device_resource_ref()); test_get_current_device_resource_ref(); // test allocating with the new default resource }); @@ -156,7 +155,17 @@ TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRefPerThread_mt) } } -TEST_P(mr_ref_test_mt, Allocate) { spawn(test_various_allocations, this->ref); } +TEST_P(mr_ref_test_mt, Allocate) +{ + int device; + RMM_CUDA_TRY(cudaGetDevice(&device)); + + auto mr = this->ref; + spawn([device, mr]() { + RMM_CUDA_TRY(cudaSetDevice(device)); + test_various_allocations(mr); + }); +} TEST_P(mr_ref_test_mt, AllocateDefaultStream) { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 6e63b3838..2af0eff44 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -347,7 +347,7 @@ inline auto make_host_pinned() { return std::make_shared(); } return std::shared_ptr{nullptr}; diff --git a/tests/mr/device/mr_ref_tests.cpp b/tests/mr/device/mr_ref_tests.cpp index 55e91d765..41af050a0 100644 --- a/tests/mr/device/mr_ref_tests.cpp +++ b/tests/mr/device/mr_ref_tests.cpp @@ -30,9 +30,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(ResourceTests, mr_ref_test, ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT "CUDA_Async", -#endif "Managed", "System", "Pool", @@ -46,9 +44,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests, INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, mr_ref_allocation_test, ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT "CUDA_Async", -#endif "Managed", "System" "Pool", diff --git a/tests/mr/device/test_utils.hpp b/tests/mr/device/test_utils.hpp index 2b9513793..5b7ef197b 100644 --- a/tests/mr/device/test_utils.hpp +++ b/tests/mr/device/test_utils.hpp @@ -31,17 +31,14 @@ inline bool is_device_accessible_memory(void* ptr) { cudaPointerAttributes attributes{}; if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or - ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr)) or - ((attributes.type == cudaMemoryTypeUnregistered) and - (rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device()))); + return attributes.devicePointer != nullptr; } inline bool is_host_memory(void* ptr) { cudaPointerAttributes attributes{}; if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } - return attributes.type == cudaMemoryTypeHost; + return attributes.hostPointer != nullptr || attributes.type == cudaMemoryTypeUnregistered; } inline bool is_properly_aligned(void* ptr) diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu index 84f599957..46447aa09 100644 --- a/tests/mr/device/thrust_allocator_tests.cu +++ b/tests/mr/device/thrust_allocator_tests.cu @@ -69,17 +69,11 @@ TEST_P(allocator_test, multi_device) }()); } -INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, - allocator_test, - ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - "CUDA_Async", -#endif - "Managed", - "Pool", - "Arena", - "Binning"), - [](auto const& info) { return info.param; }); +INSTANTIATE_TEST_CASE_P( + ThrustAllocatorTests, + allocator_test, + ::testing::Values("CUDA", "CUDA_Async", "Managed", "Pool", "Arena", "Binning"), + [](auto const& info) { return info.param; }); } // namespace } // namespace rmm::test diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp index acd540ae6..3fce55fb8 100644 --- a/tests/mr/device/tracking_mr_tests.cpp +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -204,8 +204,8 @@ TEST(TrackingTest, LogOutstandingAllocations) { std::ostringstream oss; auto oss_sink = std::make_shared(oss); - rmm::logger().sinks().push_back(oss_sink); - auto old_level = rmm::logger().level(); + rmm::detail::logger().sinks().push_back(oss_sink); + auto old_level = rmm::detail::logger().level(); tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; @@ -213,7 +213,7 @@ TEST(TrackingTest, LogOutstandingAllocations) allocations.push_back(mr.allocate(ten_MiB)); } - rmm::logger().set_level(spdlog::level::debug); + rmm::detail::logger().set_level(spdlog::level::debug); EXPECT_NO_THROW(mr.log_outstanding_allocations()); #if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG @@ -224,8 +224,8 @@ TEST(TrackingTest, LogOutstandingAllocations) mr.deallocate(allocation, ten_MiB); } - rmm::logger().set_level(old_level); - rmm::logger().sinks().pop_back(); + rmm::detail::logger().set_level(old_level); + rmm::detail::logger().sinks().pop_back(); } } // namespace diff --git a/tests/prefetch_tests.cpp b/tests/prefetch_tests.cpp index 6c7bb2dd3..4a2c41a2b 100644 --- a/tests/prefetch_tests.cpp +++ b/tests/prefetch_tests.cpp @@ -53,8 +53,8 @@ struct PrefetchTest : public ::testing::Test { // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g8048f6ea5ad77917444567656c140c5a // specifically for when cudaMemRangeAttribute::cudaMemRangeAttributeLastPrefetchLocation is // used. - constexpr size_t prefetch_data_size = 4; if constexpr (std::is_same_v) { + constexpr size_t prefetch_data_size = 4; int prefetch_location{0}; RMM_CUDA_TRY( cudaMemRangeGetAttribute(&prefetch_location,