Skip to content

Commit

Permalink
Check context is valid before freeing streams, arrays. (#552)
Browse files Browse the repository at this point in the history
  • Loading branch information
pxl-th authored Nov 28, 2023
1 parent f328c9c commit 192214b
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 86 deletions.
6 changes: 4 additions & 2 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ end

# Passed to `DataRef` to handle freeing.
function _free_buf(buf, stream_ordered::Bool)
s = stream_ordered ? AMDGPU.stream() : AMDGPU.default_stream()
Mem.free(buf; stream=s)
context!(buf.ctx) do
s = stream_ordered ? AMDGPU.stream() : AMDGPU.default_stream()
Mem.free(buf; stream=s)
end
end

unsafe_free!(x::ROCArray) = GPUArrays.unsafe_free!(x.buf, true)
Expand Down
1 change: 1 addition & 0 deletions src/fft/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ function unsafe_execute!(
plan::cROCFFTPlan{T,K,false,N}, X::ROCArray{T,N}, Y::ROCArray{T},
) where {T,N,K}
X = copy(X) # since input array can also be modified
# TODO on 1.11 we need to manually cast `pointer(X)` to `Ptr{Cvoid}`.
rocfft_execute(plan, [pointer(X),], [pointer(Y),], plan.execution_info)
end

Expand Down
10 changes: 4 additions & 6 deletions src/hip/HIP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ end

mutable struct HIPContext
context::hipContext_t
valid::Bool
end
const CONTEXTS = AMDGPU.LockedObject(Dict{HIPDevice,HIPContext}())

Expand All @@ -31,10 +32,11 @@ function HIPContext(device::HIPDevice)
get!(contexts, device) do
context_ref = Ref{hipContext_t}()
hipCtxCreate(context_ref, Cuint(0), device.device) |> check
context = HIPContext(context_ref[])
context = HIPContext(context_ref[], true)

device!(device)
finalizer(context) do c
c.valid = false
hipCtxDestroy(c.context) |> check
end
return context
Expand All @@ -44,11 +46,7 @@ end

HIPContext(device_id::Integer) = HIPContext(HIPDevice(device_id))

function HIPContext()
context_ref = Ref{hipContext_t}()
hipCtxGetCurrent(context_ref) |> check
HIPContext(context_ref[])
end
HIPContext() = HIPContext(device())

Base.unsafe_convert(::Type{Ptr{T}}, context::HIPContext) where T =
reinterpret(Ptr{T}, context.context)
Expand Down
15 changes: 11 additions & 4 deletions src/hip/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mutable struct HIPStream
stream::hipStream_t
priority::Symbol
device::HIPDevice
ctx::HIPContext
end

"""
Expand All @@ -22,22 +23,28 @@ function HIPStream(priority::Symbol = :normal)

stream_ref = Ref{hipStream_t}()
hipStreamCreateWithPriority(stream_ref, Cuint(0), priority_int) |> check
stream = HIPStream(stream_ref[], priority, device())
d = device()
stream = HIPStream(stream_ref[], priority, d, HIPContext(d))
finalizer(stream) do s
hipStreamDestroy(s.stream) |> check
AMDGPU.context!(s.ctx) do
hipStreamDestroy(s.stream) |> check
end
end
return stream
end

default_stream() = HIPStream(convert(hipStream_t, C_NULL), :normal, device())
default_stream() = HIPStream(C_NULL, :normal, device(), HIPContext())

"""
HIPStream(stream::hipStream_t)
Create HIPStream from `hipStream_t` handle.
Device is the default device that's currently in use.
"""
HIPStream(stream::hipStream_t) = HIPStream(stream, priority(stream), device())
function HIPStream(stream::hipStream_t)
d = device()
HIPStream(stream, priority(stream), d, HIPContext(d))
end

function isdone(stream::HIPStream)
query = hipStreamQuery(stream)
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/Runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ module Mem

import AMDGPU
import AMDGPU: HIP, HSA, Runtime
import .HIP: HIPDevice
import .HIP: HIPDevice, HIPContext
import .Runtime: ROCDim, ROCDim3

abstract type AbstractAMDBuffer end
Expand Down
32 changes: 21 additions & 11 deletions src/runtime/memory/hip.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,19 @@ function mark_pool!(dev::HIP.HIPDevice)
end

struct HIPBuffer <: AbstractAMDBuffer
device::HIPDevice
device::HIPDevice # TODO not used?
ctx::HIPContext
ptr::Ptr{Cvoid}
bytesize::Int
own::Bool
end

function HIPBuffer(bytesize; stream::HIP.HIPStream)
dev = stream.device
bytesize == 0 && return HIPBuffer(dev, C_NULL, 0, true)
ctx = stream.ctx
bytesize == 0 && return HIPBuffer(dev, ctx, C_NULL, 0, true)

mark_pool!(dev)
# mark_pool!(dev)
pool = HIP.memory_pool(dev)

has_limit = hard_memory_limit() != typemax(UInt64)
Expand Down Expand Up @@ -106,18 +108,20 @@ function HIPBuffer(bytesize; stream::HIP.HIPStream)
@assert HIP.reserved_memory(pool) hard_memory_limit()
end

HIPBuffer(dev, ptr, bytesize, true)
HIPBuffer(dev, ctx, ptr, bytesize, true)
end

HIPBuffer(ptr::Ptr{Cvoid}, bytesize::Int) =
HIPBuffer(AMDGPU.device(), ptr, bytesize, false)
function HIPBuffer(ptr::Ptr{Cvoid}, bytesize::Int)
s = AMDGPU.stream()
HIPBuffer(s.device, s.ctx, ptr, bytesize, false)
end

Base.unsafe_convert(::Type{Ptr{T}}, buf::HIPBuffer) where T =
convert(Ptr{T}, buf.ptr)

function view(buf::HIPBuffer, bytesize::Int)
bytesize > buf.bytesize && throw(BoundsError(buf, bytesize))
HIPBuffer(buf.device, buf.ptr + bytesize, buf.bytesize - bytesize, buf.own)
HIPBuffer(buf.device, buf.ctx, buf.ptr + bytesize, buf.bytesize - bytesize, buf.own)
end

function free(buf::HIPBuffer; stream::HIP.HIPStream)
Expand Down Expand Up @@ -152,13 +156,17 @@ end

struct HostBuffer <: AbstractAMDBuffer
device::HIPDevice
ctx::HIPContext
ptr::Ptr{Cvoid}
dev_ptr::Ptr{Cvoid}
bytesize::Int
own::Bool
end

HostBuffer() = HostBuffer(AMDGPU.device(), C_NULL, C_NULL, 0, true)
function HostBuffer()
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, C_NULL, C_NULL, 0, true)
end

function HostBuffer(bytesize::Integer, flags = 0)
bytesize == 0 && return HostBuffer()
Expand All @@ -167,19 +175,21 @@ function HostBuffer(bytesize::Integer, flags = 0)
HIP.hipHostMalloc(ptr_ref, bytesize, flags) |> HIP.check
ptr = ptr_ref[]
dev_ptr = get_device_ptr(ptr)
HostBuffer(AMDGPU.device(), ptr, dev_ptr, bytesize, true)
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, ptr, dev_ptr, bytesize, true)
end

function HostBuffer(ptr::Ptr{Cvoid}, sz::Integer)
HIP.hipHostRegister(ptr, sz, HIP.hipHostRegisterMapped) |> HIP.check
dev_ptr = get_device_ptr(ptr)
HostBuffer(AMDGPU.device(), ptr, dev_ptr, sz, false)
s = AMDGPU.stream()
HostBuffer(s.device, s.ctx, ptr, dev_ptr, sz, false)
end

function view(buf::HostBuffer, bytesize::Int)
bytesize > buf.bytesize && throw(BoundsError(buf, bytesize))
HostBuffer(
buf.device,
buf.device, buf.ctx,
buf.ptr + bytesize, buf.dev_ptr + bytesize,
buf.bytesize - bytesize, buf.own)
end
Expand Down
18 changes: 11 additions & 7 deletions src/tls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ function context!(ctx::HIPContext)
HIP.context!(ctx)
task_local_state!(HIP.device(), ctx)
else
old_ctx = state.ctx
old_ctx = state.context
if old_ctx != ctx
HIP.context!(state.ctx)
HIP.context!(state.context)
state.device = HIP.device()
state.context = ctx
end
Expand All @@ -125,11 +125,15 @@ function context!(ctx::HIPContext)
end

function context!(f::Function, ctx::HIPContext)
old_ctx = context!(ctx)
return try
f()
finally
old_ctx nothing && old_ctx != ctx && context!(old_ctx)
if ctx.valid
old_ctx = context!(ctx)
return try
f()
finally
old_ctx nothing && old_ctx != ctx && context!(old_ctx)
end
else
@warn "CTX not valid"
end
end

Expand Down
14 changes: 7 additions & 7 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@ function versioninfo(io::IO=stdout)
end
println(_lib_title("rocBLAS", :rocblas; version_fn=rocBLAS.version))
if functional(:rocblas)
println(" @ $(Libdl.dlpath(librocblas))")
println(" @ $librocblas")
end
println(_lib_title("rocSOLVER", :rocsolver; version_fn=rocSOLVER.version))
if functional(:rocsolver)
println(" @ $(Libdl.dlpath(librocsolver))")
println(" @ $librocsolver")
end
println("[$(_status(functional(:rocalution)))] rocALUTION")
if functional(:rocalution)
println(" @ $(Libdl.dlpath(librocalution))")
println(" @ $librocalution")
end
println("[$(_status(functional(:rocsparse)))] rocSPARSE")
if functional(:rocsparse)
println(" @ $(Libdl.dlpath(librocsparse))")
println(" @ $librocsparse")
end
println(_lib_title("rocRAND", :rocrand; version_fn=rocRAND.version))
if functional(:rocrand)
println(" @ $(Libdl.dlpath(librocrand))")
println(" @ $librocrand")
end
println(_lib_title("rocFFT", :rocfft; version_fn=rocFFT.version))
if functional(:rocfft)
println(" @ $(Libdl.dlpath(librocfft))")
println(" @ $librocfft")
end
println(_lib_title("MIOpen", :MIOpen; version_fn=MIOpen.version))
if functional(:MIOpen)
println(" @ $(Libdl.dlpath(libMIOpen_path))")
println(" @ $libMIOpen_path")
end

if functional(:hip)
Expand Down
29 changes: 14 additions & 15 deletions test/device/output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,20 @@
@test msg == "Hello World!Goodbye World!\n"
end

#= TODO
@testset "Interpolated string" begin
inner_str = "to the"
function kernel(oc)
@rocprintln oc "Hello $inner_str World!"
nothing
end
iob = IOBuffer()
oc = OutputContext(iob)
@roc kernel(oc)
sleep(1)
@test String(take!(iob)) == "Hello to the World!\n"
end
=#
# TODO
# @testset "Interpolated string" begin
# inner_str = "to the"
# function kernel(oc)
# @rocprintln oc "Hello $inner_str World!"
# nothing
# end

# iob = IOBuffer()
# oc = OutputContext(iob)
# @roc kernel(oc)
# sleep(1)
# @test String(take!(iob)) == "Hello to the World!\n"
# end
end

@testset "@rocprintf" begin
Expand Down
4 changes: 3 additions & 1 deletion test/device_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ include("device/array.jl")
include("device/vadd.jl")
include("device/memory.jl")
include("device/indexing.jl")
include("device/math.jl")
include("device/wavefront.jl")
include("device/synchronization.jl")
include("device/execution_control.jl")
include("device/exceptions.jl")

# TODO https://github.com/JuliaGPU/AMDGPU.jl/issues/546
include("device/math.jl")

end
31 changes: 10 additions & 21 deletions test/hip_tests.jl → test/hip_core_tests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@testitem "hip" begin
@testitem "hip - core" begin

using Test
using LinearAlgebra
Expand All @@ -10,26 +10,6 @@ import AMDGPU: @allowscalar
Random.seed!(1)
AMDGPU.allowscalar(false)

if AMDGPU.functional(:rocblas)
include("rocarray/blas.jl")
end
if AMDGPU.functional(:MIOpen)
include("dnn/miopen.jl")
end
if AMDGPU.functional(:rocsolver)
include("rocarray/solver.jl")
end
if AMDGPU.functional(:rocsparse)
include("rocsparse/rocsparse.jl")
end
if AMDGPU.functional(:rocrand)
include("rocarray/random.jl")
end
# TODO rocFFT tests crash Windows due to access violation
if Sys.islinux() && AMDGPU.functional(:rocfft)
include("rocarray/fft.jl")
end

@testset "AMDGPU.@elapsed" begin
xgpu = AMDGPU.rand(Float32, 100)
t = AMDGPU.@elapsed xgpu .+= 1
Expand All @@ -52,5 +32,14 @@ if length(AMDGPU.devices()) > 1
end
end

if AMDGPU.functional(:rocblas)
include("rocarray/blas.jl")
end
if AMDGPU.functional(:MIOpen)
include("dnn/miopen.jl")
end
if AMDGPU.functional(:rocrand)
include("rocarray/random.jl")
end

end
24 changes: 24 additions & 0 deletions test/hip_extra_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
@testitem "hip - extra" begin

using Test
using LinearAlgebra
using Random

using AMDGPU: HIP, Runtime, Device, Mem
import AMDGPU: @allowscalar

Random.seed!(1)
AMDGPU.allowscalar(false)

if AMDGPU.functional(:rocsolver)
include("rocarray/solver.jl")
end
if AMDGPU.functional(:rocsparse)
include("rocsparse/rocsparse.jl")
end
# TODO rocFFT tests crash Windows due to access violation
if Sys.islinux() && AMDGPU.functional(:rocfft)
include("rocarray/fft.jl")
end

end
Loading

0 comments on commit 192214b

Please sign in to comment.