From e69f2fdeda520f90d038d3599f1ac3c5137a92bb Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 16:45:38 -0400 Subject: [PATCH 1/9] first commit --- Manifest.toml | 244 ++++++++++++++++++++++++++++++++++++++ src/KernelAbstractions.jl | 8 +- src/macros.jl | 5 + src/nditeration.jl | 61 +++++++--- 4 files changed, 296 insertions(+), 22 deletions(-) create mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 000000000..791f408f4 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,244 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.8.0" +manifest_format = "2.0" +project_hash = "9033168e08cf56d36e87b64ae995845922bec33c" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "76289dc51920fdc6e0013c872ba9551d54961c24" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.6.2" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "0.1.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.CEnum]] +git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.2" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.5.2+0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.4.1" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] +git-tree-sha1 = "5007c1421563108110bbd57f63d8ad4565808818" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "5.2.0" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "1222116d7313cdefecf3d45a2bc1a89c4e7c9217" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.22+0" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.10" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.0+0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.2.1" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.20+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.8.0" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "9673d39decc5feece56ef3940e5dafba15ba0f81" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.1.2" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] +git-tree-sha1 = "832afbae2a45b4ae7e831f86965469a24d1d8a83" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.5.26" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.2.1" + +[[deps.UnsafeAtomicsLLVM]] +deps = ["LLVM", "UnsafeAtomics"] +git-tree-sha1 = "ea37e6066bf194ab78f4e747f5245261f17a7175" +uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" +version = "0.1.2" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.12+3" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.1.1+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index 756f40f94..9de4f0497 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -489,7 +489,7 @@ end include("nditeration.jl") using .NDIteration -import .NDIteration: get +import .NDIteration: get, getrange ### # Kernel closure struct @@ -535,11 +535,13 @@ function partition(kernel, ndrange, workgroupsize) error(errmsg) end + offsets = nothing + if static_ndrange <: StaticSize if ndrange !== nothing && ndrange != get(static_ndrange) error("Static NDRange ($static_ndrange) and launch NDRange ($ndrange) differ") end - ndrange = get(static_ndrange) + ndrange, offsets = getrange(static_ndrange) end if static_workgroupsize <: StaticSize @@ -568,7 +570,7 @@ function partition(kernel, ndrange, workgroupsize) workgroupsize = CartesianIndices(workgroupsize) end - iterspace = NDRange{length(ndrange), static_blocks, static_workgroupsize}(blocks, workgroupsize) + iterspace = NDRange{length(ndrange), offsets, static_blocks, static_workgroupsize}(blocks, workgroupsize) return iterspace, dynamic end diff --git a/src/macros.jl b/src/macros.jl index e93bc386d..5078c52de 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -1,5 +1,8 @@ import MacroTools: splitdef, combinedef, isexpr, postwalk +@inline contiguousrange(range::NTuple{N, Int}, offset::NTuple{N, Int}) where N = + Tuple(1+o:r+o for (r, o) in zip(range, offset)) + function find_return(stmt) result = false postwalk(stmt) do expr @@ -54,6 +57,8 @@ function __kernel(expr, generate_cpu=true) Core.@__doc__ $name(dev) = $name(dev, $DynamicSize(), $DynamicSize()) $name(dev, size) = $name(dev, $StaticSize(size), $DynamicSize()) $name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range)) + $name(dev, size, range::NTuple{N}, offset::NTuple{N}) where N = + $name(dev, $StaticSize(size), $StaticSize($contiguousrange(range, offset))) function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S<:$_Size, NDRange<:$_Size} if $isgpu(dev) return $construct(dev, sz, range, $gpu_name) diff --git a/src/nditeration.jl b/src/nditeration.jl index d7598ae20..aaa48595f 100644 --- a/src/nditeration.jl +++ b/src/nditeration.jl @@ -13,21 +13,35 @@ abstract type _Size end struct DynamicSize <: _Size end struct StaticSize{S} <: _Size function StaticSize{S}() where S - new{S::Tuple{Vararg{Int}}}() + new{S::Tuple{Vararg}}() end end @pure StaticSize(s::Tuple{Vararg{Int}}) = StaticSize{s}() @pure StaticSize(s::Int...) = StaticSize{s}() @pure StaticSize(s::Type{<:Tuple}) = StaticSize{tuple(s.parameters...)}() +@pure StaticSize(s::Tuple{Vararg{UnitRange{Int}}}) = StaticSize{s}() # Some @pure convenience functions for `StaticSize` @pure get(::Type{StaticSize{S}}) where {S} = S @pure get(::StaticSize{S}) where {S} = S @pure Base.getindex(::StaticSize{S}, i::Int) where {S} = i <= length(S) ? S[i] : 1 -@pure Base.ndims(::StaticSize{S}) where {S} = length(S) -@pure Base.length(::StaticSize{S}) where {S} = prod(S) +@pure Base.ndims(::StaticSize{S}) where {S} = length(S) +@pure Base.length(::StaticSize{S}) where {S} = prod(worksize.(S)) +@inline getrange(::StaticSize{S}) where {S} = worksize(S), offsets(S) +@inline getrange(::Type{StaticSize{S}}) where {S} = worksize(S), offsets(S) + +@inline worksize(i::Tuple) = worksize.(i) +@inline worksize(i::Int) = i +@inline worksize(i::UnitRange) = length(i) +@inline worksize(i::StepRange) = length(i) + +@inline offsets(i) = offsets.(i) +@inline offsets(::NTuple{N, Int}) where N = nothing +@inline offsets(::Int) = nothing +@inline offsets(i::UnitRange) = i.start - 1 +@inline offsets(i::StepRange) = i.start - 1, i.step """ NDRange @@ -36,7 +50,7 @@ Encodes a blocked iteration space. # Example ``` -ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32))) +ndrange = NDRange{2, nothing, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32))) for block in ndrange for items in workitems(ndrange) I = expand(ndrange, block, items) @@ -46,23 +60,23 @@ for block in ndrange end ``` """ -struct NDRange{N, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems} +struct NDRange{N, Offsets, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems} blocks::DynamicBlock workitems::DynamicWorkitems - function NDRange{N, B, W}() where {N, B, W} - new{N, B, W, Nothing, Nothing}(nothing, nothing) + function NDRange{N, O, B, W}() where {N, O, B, W} + new{N, O, B, W, Nothing, Nothing}(nothing, nothing) end - function NDRange{N, B, W}(blocks, workitems) where {N, B, W} - new{N, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems) + function NDRange{N, O, B, W}(blocks, workitems) where {N, O, B, W} + new{N, O, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems) end end -@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:DynamicSize} = range.workitems::CartesianIndices{N} -@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:StaticSize} = CartesianIndices(get(W))::CartesianIndices{N} -@inline blocks(range::NDRange{N, B}) where {N,B<:DynamicSize} = range.blocks::CartesianIndices{N} -@inline blocks(range::NDRange{N, B}) where {N,B<:StaticSize} = CartesianIndices(get(B))::CartesianIndices{N} +@inline workitems(range::NDRange{N, O, B, W}) where {N,O,B,W<:DynamicSize} = range.workitems::CartesianIndices{N} +@inline workitems(range::NDRange{N, O, B, W}) where {N,O,B,W<:StaticSize} = CartesianIndices(get(W))::CartesianIndices{N} +@inline blocks(range::NDRange{N, O, B}) where {N,O,B<:DynamicSize} = range.blocks::CartesianIndices{N} +@inline blocks(range::NDRange{N, O, B}) where {N,O,B<:StaticSize} = CartesianIndices(get(B))::CartesianIndices{N} import Base.iterate @inline iterate(range::NDRange) = iterate(blocks(range)) @@ -70,12 +84,22 @@ import Base.iterate Base.length(range::NDRange) = length(blocks(range)) -@inline function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N +@inline function expand(ndrange::NDRange{N, nothing}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N nI = ntuple(Val(N)) do I Base.@_inline_meta stride = size(workitems(ndrange), I) gidx = groupidx.I[I] - (gidx-1)*stride + idx.I[I] + (gidx-1)*stride + idx.I[I] + end + CartesianIndex(nI) +end + +@inline function expand(ndrange::NDRange{N, Offsets}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where {N, Offsets} + nI = ntuple(Val(N)) do I + Base.@_inline_meta + stride = size(workitems(ndrange), I) + gidx = groupidx.I[I] + (gidx-1)*stride + idx.I[I] + Offsets[I] end CartesianIndex(nI) end @@ -116,12 +140,11 @@ needs to perform dynamic bounds-checking. end let workgroupsize = workgroupsize dynamic = Ref(false) - blocks = ntuple(Val(length(ndrange))) do I + blocks = ntuple(Val(length(ndrange))) do I Base.@_inline_meta - dynamic[] |= mod(ndrange[I], workgroupsize[I]) != 0 - return fld1(ndrange[I], workgroupsize[I]) + dynamic[] |= mod(worksize(ndrange[I]), workgroupsize[I]) != 0 + return fld1(worksize(ndrange[I]), workgroupsize[I]) end - return blocks, workgroupsize, dynamic[] ? DynamicCheck() : NoDynamicCheck() end end From d1ed6b7dbfc0f484a7872f367df2e85d511ff252 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 16:58:31 -0400 Subject: [PATCH 2/9] tests --- src/nditeration.jl | 8 ++++++++ test/test.jl | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/nditeration.jl b/src/nditeration.jl index aaa48595f..f7a0de9b0 100644 --- a/src/nditeration.jl +++ b/src/nditeration.jl @@ -64,6 +64,14 @@ struct NDRange{N, Offsets, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicW blocks::DynamicBlock workitems::DynamicWorkitems + function NDRange{N, B, W}() where {N, B, W} + new{N, nothing, B, W, Nothing, Nothing}(nothing, nothing) + end + + function NDRange{N, B, W}(blocks, workitems) where {N, B, W} + new{N, nothing, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems) + end + function NDRange{N, O, B, W}() where {N, O, B, W} new{N, O, B, W, Nothing, Nothing}(nothing, nothing) end diff --git a/test/test.jl b/test/test.jl index 88086342f..7a419fd1b 100644 --- a/test/test.jl +++ b/test/test.jl @@ -215,6 +215,26 @@ end synchronize(Backend()) end +@kernel function index_global_offset!(a) + i, j = @index(Global, NTuple) + n, m = size(a) + @inbounds a[i, j] = i + n * j +end + +@conditional_testset "Offset iteration space $Backend" skip_test begin + a = zeros(7, 9) + loop! = index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2)) + loop!(a) + synchronize(Backend()) + + b = [i + 7 * j for i in 1:7, j in 1:9] + + @test a[3:5, 3:7] == b[3:5, 3:7] + @test a[1:2, :] == zeros(2, 9) + @test a[6:7, :] == zeros(2, 9) + @test a[:, 1:2] == zeros(7, 2) + @test a[:, 8:9] == zeros(7, 2) +end @conditional_testset "return statement" skip_tests begin try From 4557353e27b9a46220c347ab5b1a0cfde15c10dc Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 17:00:38 -0400 Subject: [PATCH 3/9] separate offsets prior to blocks() --- src/nditeration.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nditeration.jl b/src/nditeration.jl index f7a0de9b0..4daa2a3d4 100644 --- a/src/nditeration.jl +++ b/src/nditeration.jl @@ -150,8 +150,8 @@ needs to perform dynamic bounds-checking. dynamic = Ref(false) blocks = ntuple(Val(length(ndrange))) do I Base.@_inline_meta - dynamic[] |= mod(worksize(ndrange[I]), workgroupsize[I]) != 0 - return fld1(worksize(ndrange[I]), workgroupsize[I]) + dynamic[] |= mod(ndrange[I], workgroupsize[I]) != 0 + return fld1(ndrange[I], workgroupsize[I]) end return blocks, workgroupsize, dynamic[] ? DynamicCheck() : NoDynamicCheck() end From 8accb00b07fc38727baad393dc94f0d86802c834 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 17:01:10 -0400 Subject: [PATCH 4/9] fix tests --- test/test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.jl b/test/test.jl index 7a419fd1b..ba5d8936c 100644 --- a/test/test.jl +++ b/test/test.jl @@ -221,7 +221,7 @@ end @inbounds a[i, j] = i + n * j end -@conditional_testset "Offset iteration space $Backend" skip_test begin +@conditional_testset "Offset iteration space $Backend" skip_tests begin a = zeros(7, 9) loop! = index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2)) loop!(a) From d7c75929ba19535e97b53912f4d918d49d6d4d08 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 17:11:48 -0400 Subject: [PATCH 5/9] handle nothing --- src/macros.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 5078c52de..5bde61a30 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -57,8 +57,8 @@ function __kernel(expr, generate_cpu=true) Core.@__doc__ $name(dev) = $name(dev, $DynamicSize(), $DynamicSize()) $name(dev, size) = $name(dev, $StaticSize(size), $DynamicSize()) $name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range)) - $name(dev, size, range::NTuple{N}, offset::NTuple{N}) where N = - $name(dev, $StaticSize(size), $StaticSize($contiguousrange(range, offset))) + $name(dev, size, range, ::Nothing) = $name(dev, size, range) + $name(dev, size, range, offset) = $name(dev, $StaticSize(size), $StaticSize($contiguousrange(range, offset))) function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S<:$_Size, NDRange<:$_Size} if $isgpu(dev) return $construct(dev, sz, range, $gpu_name) From c5d969cdbb40afe71212c04c609764af4450379b Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 18:27:12 -0400 Subject: [PATCH 6/9] fix test --- test/test.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test.jl b/test/test.jl index ba5d8936c..ffbfb7ce4 100644 --- a/test/test.jl +++ b/test/test.jl @@ -223,8 +223,7 @@ end @conditional_testset "Offset iteration space $Backend" skip_tests begin a = zeros(7, 9) - loop! = index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2)) - loop!(a) + index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2))(a) synchronize(Backend()) b = [i + 7 * j for i in 1:7, j in 1:9] From 4692df9cc2b503af890cda67e5403b460f5a9933 Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Sun, 11 Jun 2023 18:27:28 -0400 Subject: [PATCH 7/9] remove manifest --- Manifest.toml | 244 -------------------------------------------------- 1 file changed, 244 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 791f408f4..000000000 --- a/Manifest.toml +++ /dev/null @@ -1,244 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.8.0" -manifest_format = "2.0" -project_hash = "9033168e08cf56d36e87b64ae995845922bec33c" - -[[deps.Adapt]] -deps = ["LinearAlgebra", "Requires"] -git-tree-sha1 = "76289dc51920fdc6e0013c872ba9551d54961c24" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.6.2" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Atomix]] -deps = ["UnsafeAtomics"] -git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" -uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" -version = "0.1.0" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.CEnum]] -git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.2" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.5.2+0" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.4.1" - -[[deps.LLVM]] -deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "5007c1421563108110bbd57f63d8ad4565808818" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "5.2.0" - -[[deps.LLVMExtra_jll]] -deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] -git-tree-sha1 = "1222116d7313cdefecf3d45a2bc1a89c4e7c9217" -uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.22+0" - -[[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.10" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.0+0" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.2.1" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.20+0" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.8.0" - -[[deps.PrecompileTools]] -deps = ["Preferences"] -git-tree-sha1 = "9673d39decc5feece56ef3940e5dafba15ba0f81" -uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -version = "1.1.2" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.4.0" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[deps.StaticArrays]] -deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] -git-tree-sha1 = "832afbae2a45b4ae7e831f86965469a24d1d8a83" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "1.5.26" - -[[deps.StaticArraysCore]] -git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a" -uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -version = "1.4.0" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.0" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.UnsafeAtomics]] -git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" -uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" -version = "0.2.1" - -[[deps.UnsafeAtomicsLLVM]] -deps = ["LLVM", "UnsafeAtomics"] -git-tree-sha1 = "ea37e6066bf194ab78f4e747f5245261f17a7175" -uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" -version = "0.1.2" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.12+3" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.1.1+0" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" From 13e6de9dddeac9a8435a494427feff6c02ba9a5d Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Jun 2023 09:46:39 -0400 Subject: [PATCH 8/9] remove `StepRange` --- src/nditeration.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/nditeration.jl b/src/nditeration.jl index 4daa2a3d4..d6d4730f3 100644 --- a/src/nditeration.jl +++ b/src/nditeration.jl @@ -35,13 +35,11 @@ end @inline worksize(i::Tuple) = worksize.(i) @inline worksize(i::Int) = i @inline worksize(i::UnitRange) = length(i) -@inline worksize(i::StepRange) = length(i) @inline offsets(i) = offsets.(i) @inline offsets(::NTuple{N, Int}) where N = nothing @inline offsets(::Int) = nothing @inline offsets(i::UnitRange) = i.start - 1 -@inline offsets(i::StepRange) = i.start - 1, i.step """ NDRange @@ -148,7 +146,7 @@ needs to perform dynamic bounds-checking. end let workgroupsize = workgroupsize dynamic = Ref(false) - blocks = ntuple(Val(length(ndrange))) do I + blocks = ntuple(Val(length(ndrange))) do I Base.@_inline_meta dynamic[] |= mod(ndrange[I], workgroupsize[I]) != 0 return fld1(ndrange[I], workgroupsize[I]) From 08c0c02bc10523a30a22bdf942a5ab3627fff6de Mon Sep 17 00:00:00 2001 From: Simone Silvestri <33547697+simone-silvestri@users.noreply.github.com> Date: Mon, 12 Jun 2023 11:32:02 -0400 Subject: [PATCH 9/9] fixed test --- test/test.jl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/test.jl b/test/test.jl index ffbfb7ce4..aaeb6710d 100644 --- a/test/test.jl +++ b/test/test.jl @@ -222,17 +222,20 @@ end end @conditional_testset "Offset iteration space $Backend" skip_tests begin - a = zeros(7, 9) + a = KernelAbstractions.zeros(Backend(), 7, 9) index_global_offset!(Backend(), (2, 2), size(a) .- 4, (2, 2))(a) synchronize(Backend()) - b = [i + 7 * j for i in 1:7, j in 1:9] + b = KernelAbstractions.zeros(CPU(), 7, 9) + b .= a - @test a[3:5, 3:7] == b[3:5, 3:7] - @test a[1:2, :] == zeros(2, 9) - @test a[6:7, :] == zeros(2, 9) - @test a[:, 1:2] == zeros(7, 2) - @test a[:, 8:9] == zeros(7, 2) + c = [i + 7 * j for i in 1:7, j in 1:9] + + @test b[3:5, 3:7] == c[3:5, 3:7] + @test b[1:2, :] == zeros(2, 9) + @test b[6:7, :] == zeros(2, 9) + @test b[:, 1:2] == zeros(7, 2) + @test b[:, 8:9] == zeros(7, 2) end @conditional_testset "return statement" skip_tests begin