Skip to content

Commit 3ee97c9

Browse files
committed
Use the block heuristic when determining a launch configuration.
1 parent 4cdb50b commit 3ee97c9

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

src/device/execution.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ end
9595
function launch_configuration(backend::AbstractGPUBackend, heuristic;
9696
elements::Int, elements_per_thread::Int)
9797
threads = clamp(elements, 1, heuristic.threads)
98-
blocks = max(cld(elements, threads), 1)
98+
blocks = clamp(cld(elements, threads), elements, heuristic.blocks)
99+
threads = cld(elements, blocks)
99100

100101
if elements_per_thread > 1 && blocks > heuristic.blocks
101102
# we want to launch more blocks than required, so prefer a grid-stride loop instead

test/testsuite/gpuinterface.jl

+8-4
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,28 @@
55
x = AT(Vector{Int}(undef, N))
66
x .= 0
77
gpu_call(x) do ctx, x
8-
x[linear_index(ctx)] = 2
8+
i = @linearidx x
9+
x[i] = 2
910
return
1011
end
1112
@test all(x-> x == 2, Array(x))
1213

1314
gpu_call(x; elements=N) do ctx, x
14-
x[linear_index(ctx)] = 2
15+
i = @linearidx x
16+
x[i] = 2
1517
return
1618
end
1719
@test all(x-> x == 2, Array(x))
1820
gpu_call(x; threads=2, blocks=(N ÷ 2)) do ctx, x
19-
x[linear_index(ctx)] = threadidx(ctx)
21+
i = @linearidx x
22+
x[i] = threadidx(ctx)
2023
return
2124
end
2225
@test Array(x) == [1,2,1,2,1,2,1,2,1,2]
2326

2427
gpu_call(x; threads=2, blocks=(N ÷ 2)) do ctx, x
25-
x[linear_index(ctx)] = blockidx(ctx)
28+
i = @linearidx x
29+
x[i] = blockidx(ctx)
2630
return
2731
end
2832
@test Array(x) == [1, 1, 2, 2, 3, 3, 4, 4, 5, 5]

0 commit comments

Comments
 (0)