Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement NVTXT #66

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,15 @@ end
end
end

###
# Extras
# - LoopInfo
# - Timeline
###

include("extras/extras.jl")
import .Extras.Timeline

###
# Backends/Implementation
###
Expand All @@ -442,11 +451,4 @@ include("backends/cpu.jl")
@init @require CUDAnative="be33ccc6-a3ff-5ff2-a52e-74243cff1e17" begin
include("backends/cuda.jl")
end

###
# Extras
# - LoopInfo
###

include("extras/extras.jl")
end #module
9 changes: 7 additions & 2 deletions src/backends/cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ end

function Event(f, args...; dependencies=nothing, progress=nothing)
T = Threads.@spawn begin
wait(MultiEvent(dependencies), progress)
f(args...)
Timeline.range "Event($(nameof(f))" begin
wait(MultiEvent(dependencies), progress)
Timeline.mark("Event($(nameof(f))) waiting done")
f(args...)
end
end
return CPUEvent(T)
end
Expand Down Expand Up @@ -79,13 +82,15 @@ function __run(obj, ndrange, iterspace, args, ::Val{dynamic}) where dynamic
Nthreads = N
len, rem = 1, 0
end
Timeline.@range string(nameof(obj.f)) begin
if Nthreads == 1
__thread_run(1, len, rem, obj, ndrange, iterspace, args, Val(dynamic))
else
@sync for tid in 1:Nthreads
Threads.@spawn __thread_run(tid, len, rem, obj, ndrange, iterspace, args, Val(dynamic))
end
end
end # Timeline
return nothing
end

Expand Down
4 changes: 4 additions & 0 deletions src/extras/extras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ include("loopinfo.jl")
using .LoopInfo
export @unroll

include("timeline.jl")
using .Timeline
export Timeline

end # module
135 changes: 135 additions & 0 deletions src/extras/timeline.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
module Timeline

using Requires
export @range, mark

module NVTXT
const LOG_FILE=Ref{IOStream}()
const SHOULD_LOG=Ref{Bool}(false)

function __init__()
if haskey(ENV, "KERNELABSTRACTIONS_TIMELINE")
SHOULD_LOG[] = true
else
SHOULD_LOG[] = false
return
end
pid = Libc.getpid()
LOG_FILE[] = open("ka-$pid.nvtxt", "w")
initialize()
atexit() do
close(LOG_FILE[])
end
end

function initialize()
SHOULD_LOG[] || return
io = LOG_FILE[]
pid = Libc.getpid()
print(io, """
SetFileDisplayName, KernelAbstractions
@RangeStartEnd, Start, End, ThreadId, Message
ProcessId = $pid
CategoryId = 1
Color = Blue
TimeBase = Manual
@RangePush, Time, ThreadId, Message
ProcessId = $pid
CategoryId = 1
Color = Blue
TimeBase = Manual
@RangePop, Time, ThreadId
ProcessId = $pid
TimeBase = Manual
@Marker, Time, ThreadId, Message
ProcessId = $pid
CategoryId = 1
Color = Blue
TimeBase = Manual
""")
end

function push_range(msg)
SHOULD_LOG[] || return
time = time_ns()
io = LOG_FILE[]
print(io, "RangePush, ")
print(io, time)
println(io, ", ", Base.Threads.threadid(), ", \"", msg, "\"")
end

function pop_range()
SHOULD_LOG[] || return
time = time_ns()
io = LOG_FILE[]
print(io, "RangePop, ")
print(io, time)
println(io, ", ", Base.Threads.threadid())
end

struct Range
start::UInt64
msg::String
end

start_range(msg::String) = Range(time_ns(), msg)
function end_range(r::Range)
SHOULD_LOG[] || return
time = time_ns()
io = LOG_FILE[]
print(io, "RangeStartEnd, ")
show(io, r.start)
print(io, ", ")
show(io, time)
println(io, ", ", Base.Threads.threadid(), ", \"", r.msg, "\"")
end

function mark(msg::String)
SHOULD_LOG[] || return
time = time_ns()
io = LOG_FILE[]
print(io, "Marker, ")
show(io, time)
println(io, ", ", Base.Threads.threadid(), ", \"", msg, "\"")
end
end # NVTXT

_mark(msg) = NVTXT.mark(msg)
_push_range(msg) = NVTXT.push_range(msg)
_pop_range() = NVTXT.pop_range()
_start_range(msg) = NVTXT.start_range(msg)
_end_range(r) = NVTXT.end_range(r)

@init @require CUDAnative="be33ccc6-a3ff-5ff2-a52e-74243cff1e17" begin
# replace implementations
import CUDAnative.NVTX

_mark(msg) = NVTX.mark(msg)
_push_range(msg) = NVTX.push_range(msg)
_pop_range() = NVTX.pop_range()
_start_range(msg) = NVTX.start_range(msg)
_end_range(r) = NVTX.end_range(r)
end

import Base: invokelatest
mark(msg) = invokelatest(_mark, msg)
push_range(msg) = invokelatest(_push_range, msg)
pop_range() = invokelatest(_pop_range)
start_range(msg) = invokelatest(_start_range, msg)
end_range(r) = invokelatest(_end_range, r)

"""
@range "msg" ex
Create a new range and execute `ex`. The range is popped automatically afterwards.
See also: [`range`](@ref)
"""
macro range(msg, ex)
quote
local range = $start_range($(esc(msg)))
local ret = $(esc(ex))
$end_range(range)
ret
end
end

end