Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

task switch not allowed from inside gc finalizer #697

Open
jariji opened this issue Nov 16, 2024 · 0 comments
Open

task switch not allowed from inside gc finalizer #697

jariji opened this issue Nov 16, 2024 · 0 comments

Comments

@jariji
Copy link
Contributor

jariji commented Nov 16, 2024

I don't have a reproducer yet but when I run my function in parallel under OhMyThreads.tmap instead of Base.map one of the threads failed.

@maleadt suggested it's because of behavior in AMDGPU.jl:

it shouldn't take locks from functionality that is used in finalizers (memory_stats)

ERROR: TaskFailedException
Stacktrace:
  [1] wait
    @ ./task.jl:352 [inlined]
  [2] fetch
    @ ./task.jl:372 [inlined]
  [3] fetch
    @ ~/.julia/packages/StableTasks/3CrzR/src/internals.jl:9 [inlined]
  [4] _mapreduce(f::typeof(fetch), op::typeof(append!!), ::IndexLinear, A::Vector{StableTasks.StableTask{Any}})
    @ Base ./reduce.jl:440
  [5] _mapreduce_dim(f::Function, op::Function, ::Base._InitialValue, A::Vector{StableTasks.StableTask{Any}}, ::Colon)
    @ Base ./reducedim.jl:365
  [6] mapreduce(f::Function, op::Function, A::Vector{StableTasks.StableTask{Any}})
    @ Base ./reducedim.jl:357
  [7] _tmapreduce(f::Function, op::Function, Arrs::Tuple{…}, ::Type{…}, scheduler::OhMyThreads.Schedulers.DynamicScheduler{…}, mapreduce_kwargs::@NamedTuple{})
    @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:95
  [8] #tmapreduce#21
    @ ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:67 [inlined]
  [9] _tmap(::OhMyThreads.Schedulers.DynamicScheduler{OhMyThreads.Schedulers.FixedCount}, ::Function, ::UnitRange{Int64})
    @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:435
 [10] tmap(::Function, ::UnitRange{Int64}; scheduler::OhMyThreads.Schedulers.NotGiven, kwargs::@Kwargs{})
    @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:357
 [11] tmap(::Function, ::UnitRange{Int64})
    @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:322
 [12] top-level scope
    @ ./timing.jl:279 [inlined]
 [13] top-level scope


nested task error: schedule: Task not runnable
    Stacktrace:
      [1] error(s::String)
        @ Base ./error.jl:35
      [2] schedule(t::Task, arg::Any; error::Bool)
        @ Base ./task.jl:851
      [3] schedule
        @ ./task.jl:849 [inlined]
      [4] notify(c::Base.GenericCondition{Base.Threads.SpinLock}, arg::Any, all::Bool, error::Bool)
        @ Base ./condition.jl:154
      [5] notify (repeats 2 times)
        @ ./condition.jl:148 [inlined]
      [6] (::Base.var"#notifywaiters#649")(rl::ReentrantLock)
        @ Base ./lock.jl:187
      [7] (::Base.var"#_unlock#648")(rl::ReentrantLock)
        @ Base ./lock.jl:183
      [8] unlock
        @ ./lock.jl:177 [inlined]
      [9] macro expansion
        @ ./lock.jl:269 [inlined]
     [10] lock(f::AMDGPU.var"#27#29"{HIPDevice}, x::AMDGPU.LockedObject{Dict{Int64, AMDGPU.MemoryStats}})
        @ AMDGPU ~/.julia/packages/AMDGPU/LXtMw/src/AMDGPU.jl:42
     [11] memory_stats (repeats 2 times)
        @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:122 [inlined]
     [12] maybe_collect(; blocking::Bool)
        @ AMDGPU ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:141
     [13] maybe_collect
        @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:138 [inlined]
     [14] AMDGPU.Runtime.Mem.HIPBuffer(bytesize::Int64; stream::HIPStream)
        @ AMDGPU.Runtime.Mem ~/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:37
     [15] HIPBuffer
        @ ~/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:33 [inlined]
     [16] pool_alloc
        @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:261 [inlined]
     [17] ROCArray
        @ ~/.julia/packages/AMDGPU/LXtMw/src/array.jl:10 [inlined]
     [18] similar
        @ ./abstractarray.jl:877 [inlined]
     [19] similar
        @ ~/.julia/packages/AMDGPU/LXtMw/src/broadcast.jl:18 [inlined]
     [20] _mapreduce(::var"#44#45"{…}, ::typeof(+), ::ROCArray{…}, ::ROCArray{…}; dims::Colon, init::Nothing)
        @ GPUArrays ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:62
     [21] _mapreduce
        @ ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:33 [inlined]
     [22] mapreduce
        @ ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:28 [inlined]

running finalizer: ErrorException("task switch not allowed from inside gc finalizer")
ijl_error at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/rtutils.c:41
ijl_switch at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:636
try_yieldto at ./task.jl:921
wait at ./task.jl:995
#wait#645 at ./condition.jl:130
wait at ./condition.jl:125 [inlined]
slowlock at ./lock.jl:156
lock at ./lock.jl:147 [inlined]
macro expansion at ./lock.jl:265 [inlined]
lock at /home/user/.julia/packages/AMDGPU/LXtMw/src/AMDGPU.jl:42
memory_stats at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:122 [inlined]
#free#9 at /home/user/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:79 [inlined]
free at /home/user/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:74
unknown function (ip: 0x7fe637f1d4a9)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
#34 at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:272
context! at /home/user/.julia/packages/AMDGPU/LXtMw/src/tls.jl:131
_pool_free at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:272
pool_free at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:265
unknown function (ip: 0x7fe637f1d175)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
release at /home/user/.julia/packages/GPUArrays/qt4ax/src/host/abstractarray.jl:42
unsafe_free! at /home/user/.julia/packages/GPUArrays/qt4ax/src/host/abstractarray.jl:91 [inlined]
unsafe_free! at /home/user/.julia/packages/AMDGPU/LXtMw/src/array.jl:24
unknown function (ip: 0x7fe636d42a55)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
run_finalizer at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:318
jl_gc_run_finalizers_in_list at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:408
run_finalizers at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:454
enable_finalizers at ./gcutils.jl:157 [inlined]
unlock at ./locks-mt.jl:68 [inlined]
multiq_deletemin at ./partr.jl:168
trypoptask at ./task.jl:977
jfptr_trypoptask_75364.1 at /nix/store/x044gz5qmcy25gm3cjil3kg0mv5jnf5p-julia-bin-1.10.3/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
get_next_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/partr.c:329 [inlined]
ijl_task_get_next at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/partr.c:382
poptask at ./task.jl:985
wait at ./task.jl:994
task_done_hook at ./task.jl:675
jfptr_task_done_hook_75287.1 at /nix/store/x044gz5qmcy25gm3cjil3kg0mv5jnf5p-julia-bin-1.10.3/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
jl_finish_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:320
start_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:1249
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant