You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
LAPACK: annotate size check in lacpy! with @noinline for reduced latency (#55029)
The `@noinline` annotation on the size check appears to reduce latency
in a second call with different argument types:
```julia
julia> using LinearAlgebra
julia> A = rand(2,2); B = similar(A);
julia> @time LAPACK.lacpy!(B, A, 'U');
0.032585 seconds (29.80 k allocations: 1.469 MiB, 99.84% compilation time)
julia> A = rand(Float32,2,2); B = similar(A);
julia> @time LAPACK.lacpy!(B, A, 'U');
0.026698 seconds (22.80 k allocations: 1.113 MiB, 99.84% compilation time) # v"1.12.0-DEV.810"
0.024715 seconds (19.88 k allocations: 987.000 KiB, 99.80% compilation time) # Without noinline
0.017084 seconds (18.52 k allocations: 903.828 KiB, 99.72% compilation time) # This PR (with noinline)
```
Copy file name to clipboardExpand all lines: stdlib/LinearAlgebra/src/lapack.jl
+6-11Lines changed: 6 additions & 11 deletions
Original file line number
Diff line number
Diff line change
@@ -7164,19 +7164,11 @@ for (fn, elty) in ((:dlacpy_, :Float64),
7164
7164
m, n =size(A)
7165
7165
m1, n1 =size(B)
7166
7166
if uplo =='U'
7167
-
if n < m
7168
-
(m1 < n || n1 < n) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($n,$n)"))
7169
-
else
7170
-
(m1 < m || n1 < n) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7171
-
end
7167
+
lacpy_size_check((m1, n1), (n < m ? n : m, n))
7172
7168
elseif uplo =='L'
7173
-
if m < n
7174
-
(m1 < m || n1 < m) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$m)"))
7175
-
else
7176
-
(m1 < m || n1 < n) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7177
-
end
7169
+
lacpy_size_check((m1, n1), (m, m < n ? m : n))
7178
7170
else
7179
-
(m1 < m || n1 < n) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7171
+
lacpy_size_check((m1,n1), (m, n))
7180
7172
end
7181
7173
lda =max(1, stride(A, 2))
7182
7174
ldb =max(1, stride(B, 2))
@@ -7189,6 +7181,9 @@ for (fn, elty) in ((:dlacpy_, :Float64),
7189
7181
end
7190
7182
end
7191
7183
7184
+
# The noinline annotation reduces latency
7185
+
@noinlinelacpy_size_check((m1, n1), (m, n)) = (m1 < m || n1 < n) &&throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
0 commit comments