diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index eaa24dd0c..a29533b2b 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -430,6 +430,15 @@ end end end +### +# Extras +# - LoopInfo +# - Timeline +### + +include("extras/extras.jl") +import .Extras.Timeline + ### # Backends/Implementation ### @@ -442,11 +451,4 @@ include("backends/cpu.jl") @init @require CUDAnative="be33ccc6-a3ff-5ff2-a52e-74243cff1e17" begin include("backends/cuda.jl") end - -### -# Extras -# - LoopInfo -### - -include("extras/extras.jl") end #module diff --git a/src/backends/cpu.jl b/src/backends/cpu.jl index 1488d53a1..b0345384c 100644 --- a/src/backends/cpu.jl +++ b/src/backends/cpu.jl @@ -8,8 +8,11 @@ end function Event(f, args...; dependencies=nothing, progress=nothing) T = Threads.@spawn begin - wait(MultiEvent(dependencies), progress) - f(args...) + Timeline.range "Event($(nameof(f))" begin + wait(MultiEvent(dependencies), progress) + Timeline.mark("Event($(nameof(f))) waiting done") + f(args...) + end end return CPUEvent(T) end @@ -79,6 +82,7 @@ function __run(obj, ndrange, iterspace, args, ::Val{dynamic}) where dynamic Nthreads = N len, rem = 1, 0 end + Timeline.@range string(nameof(obj.f)) begin if Nthreads == 1 __thread_run(1, len, rem, obj, ndrange, iterspace, args, Val(dynamic)) else @@ -86,6 +90,7 @@ function __run(obj, ndrange, iterspace, args, ::Val{dynamic}) where dynamic Threads.@spawn __thread_run(tid, len, rem, obj, ndrange, iterspace, args, Val(dynamic)) end end + end # Timeline return nothing end diff --git a/src/extras/extras.jl b/src/extras/extras.jl index 5a2534080..a9e088fee 100644 --- a/src/extras/extras.jl +++ b/src/extras/extras.jl @@ -4,4 +4,8 @@ include("loopinfo.jl") using .LoopInfo export @unroll +include("timeline.jl") +using .Timeline +export Timeline + end # module diff --git a/src/extras/timeline.jl b/src/extras/timeline.jl new file mode 100644 index 000000000..c21e6bcfa --- /dev/null +++ b/src/extras/timeline.jl @@ -0,0 +1,135 @@ +module Timeline + +using Requires +export @range, mark + +module NVTXT + const LOG_FILE=Ref{IOStream}() + const SHOULD_LOG=Ref{Bool}(false) + + function __init__() + if haskey(ENV, "KERNELABSTRACTIONS_TIMELINE") + SHOULD_LOG[] = true + else + SHOULD_LOG[] = false + return + end + pid = Libc.getpid() + LOG_FILE[] = open("ka-$pid.nvtxt", "w") + initialize() + atexit() do + close(LOG_FILE[]) + end + end + + function initialize() + SHOULD_LOG[] || return + io = LOG_FILE[] + pid = Libc.getpid() + print(io, """ + SetFileDisplayName, KernelAbstractions + @RangeStartEnd, Start, End, ThreadId, Message + ProcessId = $pid + CategoryId = 1 + Color = Blue + TimeBase = Manual + @RangePush, Time, ThreadId, Message + ProcessId = $pid + CategoryId = 1 + Color = Blue + TimeBase = Manual + @RangePop, Time, ThreadId + ProcessId = $pid + TimeBase = Manual + @Marker, Time, ThreadId, Message + ProcessId = $pid + CategoryId = 1 + Color = Blue + TimeBase = Manual + """) + end + + function push_range(msg) + SHOULD_LOG[] || return + time = time_ns() + io = LOG_FILE[] + print(io, "RangePush, ") + print(io, time) + println(io, ", ", Base.Threads.threadid(), ", \"", msg, "\"") + end + + function pop_range() + SHOULD_LOG[] || return + time = time_ns() + io = LOG_FILE[] + print(io, "RangePop, ") + print(io, time) + println(io, ", ", Base.Threads.threadid()) + end + + struct Range + start::UInt64 + msg::String + end + + start_range(msg::String) = Range(time_ns(), msg) + function end_range(r::Range) + SHOULD_LOG[] || return + time = time_ns() + io = LOG_FILE[] + print(io, "RangeStartEnd, ") + show(io, r.start) + print(io, ", ") + show(io, time) + println(io, ", ", Base.Threads.threadid(), ", \"", r.msg, "\"") + end + + function mark(msg::String) + SHOULD_LOG[] || return + time = time_ns() + io = LOG_FILE[] + print(io, "Marker, ") + show(io, time) + println(io, ", ", Base.Threads.threadid(), ", \"", msg, "\"") + end +end # NVTXT + +_mark(msg) = NVTXT.mark(msg) +_push_range(msg) = NVTXT.push_range(msg) +_pop_range() = NVTXT.pop_range() +_start_range(msg) = NVTXT.start_range(msg) +_end_range(r) = NVTXT.end_range(r) + +@init @require CUDAnative="be33ccc6-a3ff-5ff2-a52e-74243cff1e17" begin + # replace implementations + import CUDAnative.NVTX + + _mark(msg) = NVTX.mark(msg) + _push_range(msg) = NVTX.push_range(msg) + _pop_range() = NVTX.pop_range() + _start_range(msg) = NVTX.start_range(msg) + _end_range(r) = NVTX.end_range(r) +end + +import Base: invokelatest +mark(msg) = invokelatest(_mark, msg) +push_range(msg) = invokelatest(_push_range, msg) +pop_range() = invokelatest(_pop_range) +start_range(msg) = invokelatest(_start_range, msg) +end_range(r) = invokelatest(_end_range, r) + +""" + @range "msg" ex +Create a new range and execute `ex`. The range is popped automatically afterwards. +See also: [`range`](@ref) +""" +macro range(msg, ex) + quote + local range = $start_range($(esc(msg))) + local ret = $(esc(ex)) + $end_range(range) + ret + end +end + +end