Skip to content

Commit 59f08df

Browse files
authored
LAPACK: annotate size check in lacpy! with @noinline for reduced latency (#55029)
The `@noinline` annotation on the size check appears to reduce latency in a second call with different argument types: ```julia julia> using LinearAlgebra julia> A = rand(2,2); B = similar(A); julia> @time LAPACK.lacpy!(B, A, 'U'); 0.032585 seconds (29.80 k allocations: 1.469 MiB, 99.84% compilation time) julia> A = rand(Float32,2,2); B = similar(A); julia> @time LAPACK.lacpy!(B, A, 'U'); 0.026698 seconds (22.80 k allocations: 1.113 MiB, 99.84% compilation time) # v"1.12.0-DEV.810" 0.024715 seconds (19.88 k allocations: 987.000 KiB, 99.80% compilation time) # Without noinline 0.017084 seconds (18.52 k allocations: 903.828 KiB, 99.72% compilation time) # This PR (with noinline) ```
1 parent 140248e commit 59f08df

File tree

1 file changed

+6
-11
lines changed

1 file changed

+6
-11
lines changed

stdlib/LinearAlgebra/src/lapack.jl

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7164,19 +7164,11 @@ for (fn, elty) in ((:dlacpy_, :Float64),
71647164
m, n = size(A)
71657165
m1, n1 = size(B)
71667166
if uplo == 'U'
7167-
if n < m
7168-
(m1 < n || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($n,$n)"))
7169-
else
7170-
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7171-
end
7167+
lacpy_size_check((m1, n1), (n < m ? n : m, n))
71727168
elseif uplo == 'L'
7173-
if m < n
7174-
(m1 < m || n1 < m) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$m)"))
7175-
else
7176-
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7177-
end
7169+
lacpy_size_check((m1, n1), (m, m < n ? m : n))
71787170
else
7179-
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7171+
lacpy_size_check((m1, n1), (m, n))
71807172
end
71817173
lda = max(1, stride(A, 2))
71827174
ldb = max(1, stride(B, 2))
@@ -7189,6 +7181,9 @@ for (fn, elty) in ((:dlacpy_, :Float64),
71897181
end
71907182
end
71917183

7184+
# The noinline annotation reduces latency
7185+
@noinline lacpy_size_check((m1, n1), (m, n)) = (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
7186+
71927187
"""
71937188
lacpy!(B, A, uplo) -> B
71947189

0 commit comments

Comments
 (0)