Skip to content

Commit b00e9f0

Browse files
authored
Improve copyto! for short heterogeneous tuples (#39035)
There was a complaint on Twitter [1], that Julia is slower than Ruby(!) at allocating short heterogeneous arrays. The code looks something like this: ``` f() = for i in 1:100000; b=[1,2.0]; end; ``` Of course, this benchmark is silly because the arrays are unused, so the entire code should be dropped once we have further improvements to our lifetime analysis, which will change the runtime 0. However, since we're not quite able to do that yet, it is somewhat unexpected that we should be slower than Ruby here. Admittedly our arrays are tuned for larger sizes, but still. Upon further investigation, it turns out that the issue is that we're allocating extra boxed objects when pulling the fields out of the literal, since we don't have any specialization for `copyto!` from tuples, falling back to the generic `copyto!` method, which is type unstable in the presence of heterogeneous tuples. Since this code is reachable from surface syntax, I think it's worth putting in a couple of specializations to accelerate the array construction, which is what this PR does (up to tuples of length 10 to avoid excessive codegen). Since the type-instability issue applies whether the value is used or not, this should still be useful even if we're able to prove that the allocations can be dropped entirely. Before: ``` julia> @benchmark f() BenchmarkTools.Trial: memory estimate: 21.36 MiB allocs estimate: 600000 -------------- minimum time: 14.928 ms (0.00% GC) median time: 16.297 ms (0.00% GC) mean time: 16.323 ms (3.84% GC) maximum time: 26.042 ms (0.00% GC) -------------- samples: 307 evals/sample: 1 ``` After: ``` julia> @benchmark f() BenchmarkTools.Trial: memory estimate: 9.16 MiB allocs estimate: 100000 -------------- minimum time: 2.117 ms (0.00% GC) median time: 2.276 ms (0.00% GC) mean time: 2.455 ms (7.62% GC) maximum time: 4.638 ms (28.99% GC) -------------- samples: 2036 evals/sample: 1 ``` Which puts us about on par with Ruby on my machine (not entirely unsurprising, since this is essentially an allocation benchmark at this point and both allocators are written in C).
1 parent 1ad6aed commit b00e9f0

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

base/abstractarray.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -831,12 +831,14 @@ end
831831

832832
## from general iterable to any array
833833

834+
@noinline throw_dest_too_short() =
835+
throw(ArgumentError("destination has fewer elements than required"))
836+
834837
function copyto!(dest::AbstractArray, src)
835838
destiter = eachindex(dest)
836839
y = iterate(destiter)
837840
for x in src
838-
y === nothing &&
839-
throw(ArgumentError("destination has fewer elements than required"))
841+
y === nothing && throw_dest_too_short()
840842
dest[y[1]] = x
841843
y = iterate(destiter, y[2])
842844
end

base/ntuple.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,47 @@ end
9191
(t..., fill(val, N-M)...)
9292
end
9393
end
94+
95+
# Specializations for copyto! of various `NTuple`s
96+
function check_inds_compatible(dest::AbstractArray, src::Tuple)
97+
length(dest) >= length(src) || throw_dest_too_short()
98+
end
99+
100+
function _copyto_generated!(dest::AbstractArray, src::NTuple{N, Any}) where N
101+
if @generated
102+
ret = quote
103+
check_inds_compatible(dest, src)
104+
idxs = eachindex(dest)
105+
end
106+
state = ()
107+
for n in 1:N
108+
append!(ret.args, (quote
109+
ind, state = iterate(idxs, $(state...))
110+
@inbounds dest[ind] = src[$n]
111+
end).args)
112+
state = (:state,)
113+
end
114+
push!(ret.args, :(return dest))
115+
ret
116+
else
117+
length(src) == 0 && return dest
118+
return copyto!(dest, firstindex(dest), src, firstindex(src))
119+
end
120+
end
121+
122+
# Non-homogeneous tuples
123+
function copyto!(dest::AbstractArray, src::Tuple)
124+
if length(src) < 10
125+
# Manual optimization for short tuples
126+
# TODO: Better support for homogeneous tuple tails
127+
return _copyto_generated!(dest, src)
128+
else
129+
return copyto!(dest, firstindex(dest), src, firstindex(src))
130+
end
131+
end
132+
133+
# Specialization for homogeneous tuples
134+
function copyto!(dest::AbstractArray, src::Tuple{Vararg{T}} where T)
135+
length(src) == 0 && return dest
136+
copyto!(dest, firstindex(dest), src, firstindex(src))
137+
end

test/abstractarray.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,3 +1258,13 @@ Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
12581258
pushfirst!(tpa, 6, 5, 4, 3, 2)
12591259
@test tpa.data == reverse(collect(1:6))
12601260
end
1261+
1262+
@testset "copyto! with tuple" begin
1263+
randtype(n) = rand(Bool) ? 1.0 : 2
1264+
@test copyto!(fill(0.0, 100), ntuple(randtype, 100))[end] != 0.0
1265+
@test copyto!(fill(0.0, 100), ntuple(x->1.0, 100))[end] != 0.0
1266+
@test copyto!(fill(0.0, 100), ntuple(randtype, 50))[end] == 0.0
1267+
@test_throws BoundsError copyto!(fill(0.0, 50), ntuple(randtype, 100))
1268+
@test_throws BoundsError copyto!(fill(0.0, 50), ntuple(x->1.0, 100))
1269+
@test_throws ArgumentError copyto!(fill(0.0, 5), ntuple(randtype, 7))
1270+
end

0 commit comments

Comments
 (0)