Skip to content

Commit

Permalink
Re-organize UVM tests (pytorch#2292)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#2292

- Re-organize UVM tests

Reviewed By: spcyppt

Differential Revision: D53151319

fbshipit-source-id: a0c77c6432ac0c66593fb102176d6255a81c2a87
  • Loading branch information
q10 authored and facebook-github-bot committed Jan 29, 2024
1 parent 677ad39 commit 7caf97e
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 120 deletions.
5 changes: 3 additions & 2 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ run_fbgemm_gpu_tests () {
)

if [ "$fbgemm_variant" == "cpu" ]; then
# These are tests that are currently broken in FBGEMM_GPU-CPU
# These tests have non-CPU operators referenced in @given
local ignored_tests=(
./uvm_test.py
./uvm/copy_test.py
./uvm/uvm_test.py
)
elif [ "$fbgemm_variant" == "rocm" ]; then
local ignored_tests=(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ std::pair<at::Tensor, at::Tensor> run_emulate_cache_miss(
return {lxu_cache_location_with_cache_misses.cpu(), uvm_cache_stats.cpu()};
}

TEST(uvm_cache_miss_emulate_test, no_cache_miss) {
TEST(UvmCacheMissEmulateTest, no_cache_miss) {
constexpr int64_t num_requests = 10000;
constexpr int64_t num_sets = 32768;
constexpr int64_t associativity = 32;
Expand All @@ -60,7 +60,7 @@ TEST(uvm_cache_miss_emulate_test, no_cache_miss) {
at::equal(lxu_cache_locations_cpu, lxu_cache_location_with_cache_misses));
}

TEST(uvm_cache_miss_emulate_test, enforced_cache_miss) {
TEST(UvmCacheMissEmulateTest, enforced_cache_miss) {
constexpr int64_t num_requests = 10000;
constexpr int64_t num_sets = 32768;
constexpr int64_t associativity = 32;
Expand Down
154 changes: 154 additions & 0 deletions fbgemm_gpu/test/uvm/copy_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-ignore-all-errors[56]

import unittest
from typing import List

import fbgemm_gpu
import hypothesis.strategies as st
import torch
from hypothesis import given, settings, Verbosity

# pyre-fixme[16]: Module `fbgemm_gpu` has no attribute `open_source`.
open_source: bool = getattr(fbgemm_gpu, "open_source", False)

if open_source:
# pyre-ignore[21]
from test_utils import gpu_available, gpu_unavailable, skipIfRocm
else:
from fbgemm_gpu.test.test_utils import gpu_available, gpu_unavailable, skipIfRocm

if gpu_available:
# pyre-ignore[21]
from fbgemm_gpu.uvm import cudaMemAdvise, cudaMemoryAdvise, cudaMemPrefetchAsync

Check failure on line 29 in fbgemm_gpu/test/uvm/copy_test.py

View workflow job for this annotation

GitHub Actions / run-lint (3.11)

F401 'fbgemm_gpu.uvm.cudaMemAdvise' imported but unused

Check failure on line 29 in fbgemm_gpu/test/uvm/copy_test.py

View workflow job for this annotation

GitHub Actions / run-lint (3.11)

F401 'fbgemm_gpu.uvm.cudaMemoryAdvise' imported but unused

Check failure on line 29 in fbgemm_gpu/test/uvm/copy_test.py

View workflow job for this annotation

GitHub Actions / run-lint (3.11)

F401 'fbgemm_gpu.uvm.cudaMemPrefetchAsync' imported but unused


MAX_EXAMPLES = 40


class CopyTest(unittest.TestCase):
@unittest.skipIf(*gpu_unavailable)
@given(
sizes=st.lists(st.integers(min_value=1, max_value=8), min_size=1, max_size=4),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_cpu(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

cpu_t = torch.ops.fbgemm.uvm_to_cpu(uvm_t)
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_t)
assert torch.ops.fbgemm.uvm_storage(cpu_t)

uvm_t.copy_(cpu_t)
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

# Test use of cpu tensor after freeing the uvm tensor
del uvm_t
cpu_t.mul_(42)

@skipIfRocm()
@unittest.skipIf(
not torch.cuda.is_available() or torch.cuda.device_count() < 2,
"Skip unless two CUDA devices are detected",
)
@given(
sizes=st.lists(
st.integers(min_value=1, max_value=(1024)), min_size=1, max_size=4
),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_device(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

# Reference uvm tensor from second cuda device
try:
device_prototype = torch.empty(0, device="cuda:1")
except RuntimeError:
# Skip the tests if there is no "cuda:1" device
return

second_t = torch.ops.fbgemm.uvm_to_device(uvm_t, device_prototype)

assert torch.ops.fbgemm.is_uvm_tensor(second_t)
assert torch.ops.fbgemm.uvm_storage(second_t)
assert second_t.device == device_prototype.device

@unittest.skipIf(*gpu_unavailable)
@given(
sizes=st.lists(
st.integers(min_value=1, max_value=(512)), min_size=1, max_size=3
),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_cpu_clone(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

cpu_clone = torch.ops.fbgemm.uvm_to_cpu_clone(uvm_t)

assert not torch.ops.fbgemm.is_uvm_tensor(cpu_clone)
assert not torch.ops.fbgemm.uvm_storage(cpu_clone)


if __name__ == "__main__":
unittest.main()
116 changes: 0 additions & 116 deletions fbgemm_gpu/test/uvm_test.py → fbgemm_gpu/test/uvm/uvm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,42 +61,6 @@ def test_is_uvm_tensor(self, sizes: List[int], uvm_op) -> None:
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

@unittest.skipIf(*gpu_unavailable)
@given(
sizes=st.lists(st.integers(min_value=1, max_value=8), min_size=1, max_size=4),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_cpu(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

cpu_t = torch.ops.fbgemm.uvm_to_cpu(uvm_t)
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_t)
assert torch.ops.fbgemm.uvm_storage(cpu_t)

uvm_t.copy_(cpu_t)
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

# Test use of cpu tensor after freeing the uvm tensor
del uvm_t
cpu_t.mul_(42)

@unittest.skipIf(*gpu_unavailable)
def test_enum(self) -> None:
# pyre-ignore[16]
Expand Down Expand Up @@ -168,52 +132,6 @@ def test_cudaMemPrefetchAsync(self, sizes: List[int], uvm_op) -> None:

torch.cuda.synchronize(torch.device("cuda:0"))

@skipIfRocm()
@unittest.skipIf(
not torch.cuda.is_available() or torch.cuda.device_count() < 2,
"Skip unless two CUDA devices are detected",
)
@given(
sizes=st.lists(
st.integers(min_value=1, max_value=(1024)), min_size=1, max_size=4
),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_device(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

# Reference uvm tensor from second cuda device
try:
device_prototype = torch.empty(0, device="cuda:1")
except RuntimeError:
# Skip the tests if there is no "cuda:1" device
return

second_t = torch.ops.fbgemm.uvm_to_device(uvm_t, device_prototype)

assert torch.ops.fbgemm.is_uvm_tensor(second_t)
assert torch.ops.fbgemm.uvm_storage(second_t)
assert second_t.device == device_prototype.device

@skipIfRocm()
@unittest.skipIf(*gpu_unavailable)
@given(
Expand Down Expand Up @@ -289,40 +207,6 @@ def test_uvm_memadviceDontFork(self, sizes: List[int], uvm_op) -> None:

torch.ops.fbgemm.uvm_mem_advice_dont_fork(cpu_t)

@unittest.skipIf(*gpu_unavailable)
@given(
sizes=st.lists(
st.integers(min_value=1, max_value=(512)), min_size=1, max_size=3
),
uvm_op=st.sampled_from(
[
torch.ops.fbgemm.new_unified_tensor,
torch.ops.fbgemm.new_managed_tensor,
torch.ops.fbgemm.new_vanilla_managed_tensor,
]
),
)
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
# pyre-fixme[2]: Parameter must be annotated.
def test_uvm_to_cpu_clone(self, sizes: List[int], uvm_op) -> None:
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
is_host_mapped = False
uvm_t = uvm_op(
torch.empty(0, device="cuda:0", dtype=torch.float),
sizes,
is_host_mapped,
)
else:
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)

assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
assert torch.ops.fbgemm.uvm_storage(uvm_t)

cpu_clone = torch.ops.fbgemm.uvm_to_cpu_clone(uvm_t)

assert not torch.ops.fbgemm.is_uvm_tensor(cpu_clone)
assert not torch.ops.fbgemm.uvm_storage(cpu_clone)

@unittest.skipIf(*gpu_unavailable)
@given(
sizes=st.lists(
Expand Down

0 comments on commit 7caf97e

Please sign in to comment.