Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check context is valid before freeing streams, arrays. #552

Merged
merged 2 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@

# Passed to `DataRef` to handle freeing.
function _free_buf(buf, stream_ordered::Bool)
s = stream_ordered ? AMDGPU.stream() : AMDGPU.default_stream()
Mem.free(buf; stream=s)
context!(buf.ctx) do
s = stream_ordered ? AMDGPU.stream() : AMDGPU.default_stream()
Mem.free(buf; stream=s)

Check warning on line 30 in src/array.jl

View check run for this annotation

Codecov / codecov/patch

src/array.jl#L28-L30

Added lines #L28 - L30 were not covered by tests
end
end

unsafe_free!(x::ROCArray) = GPUArrays.unsafe_free!(x.buf, true)
Expand Down
1 change: 1 addition & 0 deletions src/fft/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ function unsafe_execute!(
plan::cROCFFTPlan{T,K,false,N}, X::ROCArray{T,N}, Y::ROCArray{T},
) where {T,N,K}
X = copy(X) # since input array can also be modified
# TODO on 1.11 we need to manually cast `pointer(X)` to `Ptr{Cvoid}`.
rocfft_execute(plan, [pointer(X),], [pointer(Y),], plan.execution_info)
end

Expand Down
10 changes: 4 additions & 6 deletions src/hip/HIP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

mutable struct HIPContext
context::hipContext_t
valid::Bool
end
const CONTEXTS = AMDGPU.LockedObject(Dict{HIPDevice,HIPContext}())

Expand All @@ -31,10 +32,11 @@
get!(contexts, device) do
context_ref = Ref{hipContext_t}()
hipCtxCreate(context_ref, Cuint(0), device.device) |> check
context = HIPContext(context_ref[])
context = HIPContext(context_ref[], true)

Check warning on line 35 in src/hip/HIP.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/HIP.jl#L35

Added line #L35 was not covered by tests

device!(device)
finalizer(context) do c
c.valid = false

Check warning on line 39 in src/hip/HIP.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/HIP.jl#L39

Added line #L39 was not covered by tests
hipCtxDestroy(c.context) |> check
end
return context
Expand All @@ -44,11 +46,7 @@

HIPContext(device_id::Integer) = HIPContext(HIPDevice(device_id))

function HIPContext()
context_ref = Ref{hipContext_t}()
hipCtxGetCurrent(context_ref) |> check
HIPContext(context_ref[])
end
HIPContext() = HIPContext(device())

Check warning on line 49 in src/hip/HIP.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/HIP.jl#L49

Added line #L49 was not covered by tests

Base.unsafe_convert(::Type{Ptr{T}}, context::HIPContext) where T =
reinterpret(Ptr{T}, context.context)
Expand Down
15 changes: 11 additions & 4 deletions src/hip/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
stream::hipStream_t
priority::Symbol
device::HIPDevice
ctx::HIPContext
end

"""
Expand All @@ -22,22 +23,28 @@

stream_ref = Ref{hipStream_t}()
hipStreamCreateWithPriority(stream_ref, Cuint(0), priority_int) |> check
stream = HIPStream(stream_ref[], priority, device())
d = device()
stream = HIPStream(stream_ref[], priority, d, HIPContext(d))

Check warning on line 27 in src/hip/stream.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/stream.jl#L26-L27

Added lines #L26 - L27 were not covered by tests
finalizer(stream) do s
hipStreamDestroy(s.stream) |> check
AMDGPU.context!(s.ctx) do
hipStreamDestroy(s.stream) |> check

Check warning on line 30 in src/hip/stream.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/stream.jl#L29-L30

Added lines #L29 - L30 were not covered by tests
end
end
return stream
end

default_stream() = HIPStream(convert(hipStream_t, C_NULL), :normal, device())
default_stream() = HIPStream(C_NULL, :normal, device(), HIPContext())

Check warning on line 36 in src/hip/stream.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/stream.jl#L36

Added line #L36 was not covered by tests

"""
HIPStream(stream::hipStream_t)

Create HIPStream from `hipStream_t` handle.
Device is the default device that's currently in use.
"""
HIPStream(stream::hipStream_t) = HIPStream(stream, priority(stream), device())
function HIPStream(stream::hipStream_t)
d = device()
HIPStream(stream, priority(stream), d, HIPContext(d))

Check warning on line 46 in src/hip/stream.jl

View check run for this annotation

Codecov / codecov/patch

src/hip/stream.jl#L44-L46

Added lines #L44 - L46 were not covered by tests
end

function isdone(stream::HIPStream)
query = hipStreamQuery(stream)
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/Runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ module Mem

import AMDGPU
import AMDGPU: HIP, HSA, Runtime
import .HIP: HIPDevice
import .HIP: HIPDevice, HIPContext
import .Runtime: ROCDim, ROCDim3

abstract type AbstractAMDBuffer end
Expand Down
32 changes: 21 additions & 11 deletions src/runtime/memory/hip.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,19 @@
end

struct HIPBuffer <: AbstractAMDBuffer
device::HIPDevice
device::HIPDevice # TODO not used?
ctx::HIPContext
ptr::Ptr{Cvoid}
bytesize::Int
own::Bool
end

function HIPBuffer(bytesize; stream::HIP.HIPStream)
dev = stream.device
bytesize == 0 && return HIPBuffer(dev, C_NULL, 0, true)
ctx = stream.ctx
bytesize == 0 && return HIPBuffer(dev, ctx, C_NULL, 0, true)

Check warning on line 73 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L72-L73

Added lines #L72 - L73 were not covered by tests

mark_pool!(dev)
# mark_pool!(dev)
pool = HIP.memory_pool(dev)

has_limit = hard_memory_limit() != typemax(UInt64)
Expand Down Expand Up @@ -106,18 +108,20 @@
@assert HIP.reserved_memory(pool) ≤ hard_memory_limit()
end

HIPBuffer(dev, ptr, bytesize, true)
HIPBuffer(dev, ctx, ptr, bytesize, true)

Check warning on line 111 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L111

Added line #L111 was not covered by tests
end

HIPBuffer(ptr::Ptr{Cvoid}, bytesize::Int) =
HIPBuffer(AMDGPU.device(), ptr, bytesize, false)
function HIPBuffer(ptr::Ptr{Cvoid}, bytesize::Int)
s = AMDGPU.stream()
HIPBuffer(s.device, s.ctx, ptr, bytesize, false)

Check warning on line 116 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L114-L116

Added lines #L114 - L116 were not covered by tests
end

Base.unsafe_convert(::Type{Ptr{T}}, buf::HIPBuffer) where T =
convert(Ptr{T}, buf.ptr)

function view(buf::HIPBuffer, bytesize::Int)
bytesize > buf.bytesize && throw(BoundsError(buf, bytesize))
HIPBuffer(buf.device, buf.ptr + bytesize, buf.bytesize - bytesize, buf.own)
HIPBuffer(buf.device, buf.ctx, buf.ptr + bytesize, buf.bytesize - bytesize, buf.own)

Check warning on line 124 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L124

Added line #L124 was not covered by tests
end

function free(buf::HIPBuffer; stream::HIP.HIPStream)
Expand Down Expand Up @@ -152,13 +156,17 @@

struct HostBuffer <: AbstractAMDBuffer
device::HIPDevice
ctx::HIPContext
ptr::Ptr{Cvoid}
dev_ptr::Ptr{Cvoid}
bytesize::Int
own::Bool
end

HostBuffer() = HostBuffer(AMDGPU.device(), C_NULL, C_NULL, 0, true)
function HostBuffer()
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, C_NULL, C_NULL, 0, true)

Check warning on line 168 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L166-L168

Added lines #L166 - L168 were not covered by tests
end

function HostBuffer(bytesize::Integer, flags = 0)
bytesize == 0 && return HostBuffer()
Expand All @@ -167,19 +175,21 @@
HIP.hipHostMalloc(ptr_ref, bytesize, flags) |> HIP.check
ptr = ptr_ref[]
dev_ptr = get_device_ptr(ptr)
HostBuffer(AMDGPU.device(), ptr, dev_ptr, bytesize, true)
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, ptr, dev_ptr, bytesize, true)

Check warning on line 179 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L178-L179

Added lines #L178 - L179 were not covered by tests
end

function HostBuffer(ptr::Ptr{Cvoid}, sz::Integer)
HIP.hipHostRegister(ptr, sz, HIP.hipHostRegisterMapped) |> HIP.check
dev_ptr = get_device_ptr(ptr)
HostBuffer(AMDGPU.device(), ptr, dev_ptr, sz, false)
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, ptr, dev_ptr, sz, false)

Check warning on line 186 in src/runtime/memory/hip.jl

View check run for this annotation

Codecov / codecov/patch

src/runtime/memory/hip.jl#L185-L186

Added lines #L185 - L186 were not covered by tests
end

function view(buf::HostBuffer, bytesize::Int)
bytesize > buf.bytesize && throw(BoundsError(buf, bytesize))
HostBuffer(
buf.device,
buf.device, buf.ctx,
buf.ptr + bytesize, buf.dev_ptr + bytesize,
buf.bytesize - bytesize, buf.own)
end
Expand Down
18 changes: 11 additions & 7 deletions src/tls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@
HIP.context!(ctx)
task_local_state!(HIP.device(), ctx)
else
old_ctx = state.ctx
old_ctx = state.context

Check warning on line 117 in src/tls.jl

View check run for this annotation

Codecov / codecov/patch

src/tls.jl#L117

Added line #L117 was not covered by tests
if old_ctx != ctx
HIP.context!(state.ctx)
HIP.context!(state.context)

Check warning on line 119 in src/tls.jl

View check run for this annotation

Codecov / codecov/patch

src/tls.jl#L119

Added line #L119 was not covered by tests
state.device = HIP.device()
state.context = ctx
end
Expand All @@ -125,11 +125,15 @@
end

function context!(f::Function, ctx::HIPContext)
old_ctx = context!(ctx)
return try
f()
finally
old_ctx ≢ nothing && old_ctx != ctx && context!(old_ctx)
if ctx.valid
old_ctx = context!(ctx)
return try
f()

Check warning on line 131 in src/tls.jl

View check run for this annotation

Codecov / codecov/patch

src/tls.jl#L128-L131

Added lines #L128 - L131 were not covered by tests
finally
old_ctx ≢ nothing && old_ctx != ctx && context!(old_ctx)

Check warning on line 133 in src/tls.jl

View check run for this annotation

Codecov / codecov/patch

src/tls.jl#L133

Added line #L133 was not covered by tests
end
else
@warn "CTX not valid"

Check warning on line 136 in src/tls.jl

View check run for this annotation

Codecov / codecov/patch

src/tls.jl#L136

Added line #L136 was not covered by tests
end
end

Expand Down
14 changes: 7 additions & 7 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@
end
println(_lib_title("rocBLAS", :rocblas; version_fn=rocBLAS.version))
if functional(:rocblas)
println(" @ $(Libdl.dlpath(librocblas))")
println(" @ $librocblas")

Check warning on line 24 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L24

Added line #L24 was not covered by tests
end
println(_lib_title("rocSOLVER", :rocsolver; version_fn=rocSOLVER.version))
if functional(:rocsolver)
println(" @ $(Libdl.dlpath(librocsolver))")
println(" @ $librocsolver")

Check warning on line 28 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L28

Added line #L28 was not covered by tests
end
println("[$(_status(functional(:rocalution)))] rocALUTION")
if functional(:rocalution)
println(" @ $(Libdl.dlpath(librocalution))")
println(" @ $librocalution")

Check warning on line 32 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L32

Added line #L32 was not covered by tests
end
println("[$(_status(functional(:rocsparse)))] rocSPARSE")
if functional(:rocsparse)
println(" @ $(Libdl.dlpath(librocsparse))")
println(" @ $librocsparse")

Check warning on line 36 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L36

Added line #L36 was not covered by tests
end
println(_lib_title("rocRAND", :rocrand; version_fn=rocRAND.version))
if functional(:rocrand)
println(" @ $(Libdl.dlpath(librocrand))")
println(" @ $librocrand")

Check warning on line 40 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L40

Added line #L40 was not covered by tests
end
println(_lib_title("rocFFT", :rocfft; version_fn=rocFFT.version))
if functional(:rocfft)
println(" @ $(Libdl.dlpath(librocfft))")
println(" @ $librocfft")

Check warning on line 44 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L44

Added line #L44 was not covered by tests
end
println(_lib_title("MIOpen", :MIOpen; version_fn=MIOpen.version))
if functional(:MIOpen)
println(" @ $(Libdl.dlpath(libMIOpen_path))")
println(" @ $libMIOpen_path")

Check warning on line 48 in src/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/utils.jl#L48

Added line #L48 was not covered by tests
end

if functional(:hip)
Expand Down
29 changes: 14 additions & 15 deletions test/device/output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,20 @@
@test msg == "Hello World!Goodbye World!\n"
end

#= TODO
@testset "Interpolated string" begin
inner_str = "to the"
function kernel(oc)
@rocprintln oc "Hello $inner_str World!"
nothing
end

iob = IOBuffer()
oc = OutputContext(iob)
@roc kernel(oc)
sleep(1)
@test String(take!(iob)) == "Hello to the World!\n"
end
=#
# TODO
# @testset "Interpolated string" begin
# inner_str = "to the"
# function kernel(oc)
# @rocprintln oc "Hello $inner_str World!"
# nothing
# end

# iob = IOBuffer()
# oc = OutputContext(iob)
# @roc kernel(oc)
# sleep(1)
# @test String(take!(iob)) == "Hello to the World!\n"
# end
end

@testset "@rocprintf" begin
Expand Down
4 changes: 3 additions & 1 deletion test/device_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ include("device/array.jl")
include("device/vadd.jl")
include("device/memory.jl")
include("device/indexing.jl")
include("device/math.jl")
include("device/wavefront.jl")
include("device/synchronization.jl")
include("device/execution_control.jl")
include("device/exceptions.jl")

# TODO https://github.com/JuliaGPU/AMDGPU.jl/issues/546
include("device/math.jl")

end
31 changes: 10 additions & 21 deletions test/hip_tests.jl → test/hip_core_tests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@testitem "hip" begin
@testitem "hip - core" begin

using Test
using LinearAlgebra
Expand All @@ -10,26 +10,6 @@ import AMDGPU: @allowscalar
Random.seed!(1)
AMDGPU.allowscalar(false)

if AMDGPU.functional(:rocblas)
include("rocarray/blas.jl")
end
if AMDGPU.functional(:MIOpen)
include("dnn/miopen.jl")
end
if AMDGPU.functional(:rocsolver)
include("rocarray/solver.jl")
end
if AMDGPU.functional(:rocsparse)
include("rocsparse/rocsparse.jl")
end
if AMDGPU.functional(:rocrand)
include("rocarray/random.jl")
end
# TODO rocFFT tests crash Windows due to access violation
if Sys.islinux() && AMDGPU.functional(:rocfft)
include("rocarray/fft.jl")
end

@testset "AMDGPU.@elapsed" begin
xgpu = AMDGPU.rand(Float32, 100)
t = AMDGPU.@elapsed xgpu .+= 1
Expand All @@ -52,5 +32,14 @@ if length(AMDGPU.devices()) > 1
end
end

if AMDGPU.functional(:rocblas)
include("rocarray/blas.jl")
end
if AMDGPU.functional(:MIOpen)
include("dnn/miopen.jl")
end
if AMDGPU.functional(:rocrand)
include("rocarray/random.jl")
end

end
24 changes: 24 additions & 0 deletions test/hip_extra_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
@testitem "hip - extra" begin

using Test
using LinearAlgebra
using Random

using AMDGPU: HIP, Runtime, Device, Mem
import AMDGPU: @allowscalar

Random.seed!(1)
AMDGPU.allowscalar(false)

if AMDGPU.functional(:rocsolver)
include("rocarray/solver.jl")
end
if AMDGPU.functional(:rocsparse)
include("rocsparse/rocsparse.jl")
end
# TODO rocFFT tests crash Windows due to access violation
if Sys.islinux() && AMDGPU.functional(:rocfft)
include("rocarray/fft.jl")
end

end
Loading