We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
I don't have a reproducer yet but when I run my function in parallel under OhMyThreads.tmap instead of Base.map one of the threads failed.
OhMyThreads.tmap
@maleadt suggested it's because of behavior in AMDGPU.jl:
it shouldn't take locks from functionality that is used in finalizers (memory_stats)
ERROR: TaskFailedException Stacktrace: [1] wait @ ./task.jl:352 [inlined] [2] fetch @ ./task.jl:372 [inlined] [3] fetch @ ~/.julia/packages/StableTasks/3CrzR/src/internals.jl:9 [inlined] [4] _mapreduce(f::typeof(fetch), op::typeof(append!!), ::IndexLinear, A::Vector{StableTasks.StableTask{Any}}) @ Base ./reduce.jl:440 [5] _mapreduce_dim(f::Function, op::Function, ::Base._InitialValue, A::Vector{StableTasks.StableTask{Any}}, ::Colon) @ Base ./reducedim.jl:365 [6] mapreduce(f::Function, op::Function, A::Vector{StableTasks.StableTask{Any}}) @ Base ./reducedim.jl:357 [7] _tmapreduce(f::Function, op::Function, Arrs::Tuple{…}, ::Type{…}, scheduler::OhMyThreads.Schedulers.DynamicScheduler{…}, mapreduce_kwargs::@NamedTuple{}) @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:95 [8] #tmapreduce#21 @ ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:67 [inlined] [9] _tmap(::OhMyThreads.Schedulers.DynamicScheduler{OhMyThreads.Schedulers.FixedCount}, ::Function, ::UnitRange{Int64}) @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:435 [10] tmap(::Function, ::UnitRange{Int64}; scheduler::OhMyThreads.Schedulers.NotGiven, kwargs::@Kwargs{}) @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:357 [11] tmap(::Function, ::UnitRange{Int64}) @ OhMyThreads.Implementation ~/.julia/packages/OhMyThreads/OK01i/src/implementation.jl:322 [12] top-level scope @ ./timing.jl:279 [inlined] [13] top-level scope nested task error: schedule: Task not runnable Stacktrace: [1] error(s::String) @ Base ./error.jl:35 [2] schedule(t::Task, arg::Any; error::Bool) @ Base ./task.jl:851 [3] schedule @ ./task.jl:849 [inlined] [4] notify(c::Base.GenericCondition{Base.Threads.SpinLock}, arg::Any, all::Bool, error::Bool) @ Base ./condition.jl:154 [5] notify (repeats 2 times) @ ./condition.jl:148 [inlined] [6] (::Base.var"#notifywaiters#649")(rl::ReentrantLock) @ Base ./lock.jl:187 [7] (::Base.var"#_unlock#648")(rl::ReentrantLock) @ Base ./lock.jl:183 [8] unlock @ ./lock.jl:177 [inlined] [9] macro expansion @ ./lock.jl:269 [inlined] [10] lock(f::AMDGPU.var"#27#29"{HIPDevice}, x::AMDGPU.LockedObject{Dict{Int64, AMDGPU.MemoryStats}}) @ AMDGPU ~/.julia/packages/AMDGPU/LXtMw/src/AMDGPU.jl:42 [11] memory_stats (repeats 2 times) @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:122 [inlined] [12] maybe_collect(; blocking::Bool) @ AMDGPU ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:141 [13] maybe_collect @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:138 [inlined] [14] AMDGPU.Runtime.Mem.HIPBuffer(bytesize::Int64; stream::HIPStream) @ AMDGPU.Runtime.Mem ~/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:37 [15] HIPBuffer @ ~/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:33 [inlined] [16] pool_alloc @ ~/.julia/packages/AMDGPU/LXtMw/src/memory.jl:261 [inlined] [17] ROCArray @ ~/.julia/packages/AMDGPU/LXtMw/src/array.jl:10 [inlined] [18] similar @ ./abstractarray.jl:877 [inlined] [19] similar @ ~/.julia/packages/AMDGPU/LXtMw/src/broadcast.jl:18 [inlined] [20] _mapreduce(::var"#44#45"{…}, ::typeof(+), ::ROCArray{…}, ::ROCArray{…}; dims::Colon, init::Nothing) @ GPUArrays ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:62 [21] _mapreduce @ ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:33 [inlined] [22] mapreduce @ ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:28 [inlined] running finalizer: ErrorException("task switch not allowed from inside gc finalizer") ijl_error at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/rtutils.c:41 ijl_switch at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:636 try_yieldto at ./task.jl:921 wait at ./task.jl:995 #wait#645 at ./condition.jl:130 wait at ./condition.jl:125 [inlined] slowlock at ./lock.jl:156 lock at ./lock.jl:147 [inlined] macro expansion at ./lock.jl:265 [inlined] lock at /home/user/.julia/packages/AMDGPU/LXtMw/src/AMDGPU.jl:42 memory_stats at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:122 [inlined] #free#9 at /home/user/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:79 [inlined] free at /home/user/.julia/packages/AMDGPU/LXtMw/src/runtime/memory/hip.jl:74 unknown function (ip: 0x7fe637f1d4a9) _jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined] ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077 #34 at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:272 context! at /home/user/.julia/packages/AMDGPU/LXtMw/src/tls.jl:131 _pool_free at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:272 pool_free at /home/user/.julia/packages/AMDGPU/LXtMw/src/memory.jl:265 unknown function (ip: 0x7fe637f1d175) _jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined] ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077 release at /home/user/.julia/packages/GPUArrays/qt4ax/src/host/abstractarray.jl:42 unsafe_free! at /home/user/.julia/packages/GPUArrays/qt4ax/src/host/abstractarray.jl:91 [inlined] unsafe_free! at /home/user/.julia/packages/AMDGPU/LXtMw/src/array.jl:24 unknown function (ip: 0x7fe636d42a55) _jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined] ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077 run_finalizer at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:318 jl_gc_run_finalizers_in_list at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:408 run_finalizers at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gc.c:454 enable_finalizers at ./gcutils.jl:157 [inlined] unlock at ./locks-mt.jl:68 [inlined] multiq_deletemin at ./partr.jl:168 trypoptask at ./task.jl:977 jfptr_trypoptask_75364.1 at /nix/store/x044gz5qmcy25gm3cjil3kg0mv5jnf5p-julia-bin-1.10.3/lib/julia/sys.so (unknown line) _jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined] ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077 get_next_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/partr.c:329 [inlined] ijl_task_get_next at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/partr.c:382 poptask at ./task.jl:985 wait at ./task.jl:994 task_done_hook at ./task.jl:675 jfptr_task_done_hook_75287.1 at /nix/store/x044gz5qmcy25gm3cjil3kg0mv5jnf5p-julia-bin-1.10.3/lib/julia/sys.so (unknown line) _jl_invoke at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined] ijl_apply_generic at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/gf.c:3077 jl_apply at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined] jl_finish_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:320 start_task at /cache/build/builder-amdci4-2/julialang/julia-release-1-dot-10/src/task.c:1249
The text was updated successfully, but these errors were encountered:
No branches or pull requests
I don't have a reproducer yet but when I run my function in parallel under
OhMyThreads.tmap
instead of Base.map one of the threads failed.@maleadt suggested it's because of behavior in AMDGPU.jl:
The text was updated successfully, but these errors were encountered: