Skip to content

Commit 3669e44

Browse files
committed
Speed up RGB <--> XYZ conversions again
1 parent 9bf4ed7 commit 3669e44

File tree

4 files changed

+52
-18
lines changed

4 files changed

+52
-18
lines changed

src/conversions.jl

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ correct_gamut(c::CV) where {CV<:TransparentRGB} =
7777
CV(clamp01(red(c)), clamp01(green(c)), clamp01(blue(c)), clamp01(alpha(c))) # for `hex`
7878

7979
function srgb_compand(v::Fractional)
80-
# the following is an optimization technique for `1.055v^(1/2.4) - 0.055`.
81-
# x^y ≈ exp(y*log(x)) ≈ exp2(y*log2(y)); the middle form is faster
82-
v <= 0.0031308 ? 12.92v : 1.055 * exp(1/2.4 * log(v)) - 0.055
80+
# `pow5_12` is an optimized function to get `v^(1/2.4)`
81+
v <= 0.0031308 ? 12.92v : 1.055 * pow5_12(v) - 0.055
8382
end
8483

8584
cnvt(::Type{CV}, c::AbstractRGB) where {CV<:AbstractRGB} = CV(red(c), green(c), blue(c))
@@ -270,17 +269,30 @@ cnvt(::Type{HSI{T}}, c::Color3) where {T} = cnvt(HSI{T}, convert(RGB{T}, c))
270269
# -----------------
271270

272271
function invert_srgb_compand(v::Fractional)
273-
v <= 0.04045 && return v/12.92
274-
# the following is an optimization technique for `((v+0.055) /1.055)^2.4`.
275-
# see also: srgb_compand(v::Fractional)
276-
x = (v + 0.055) / 1.055
277-
return x^2 * exp(0.4 * log(x)) # 2.4 == 2 + 0.4
272+
# `pow12_5` is an optimized function to get `x^2.4`
273+
v <= 0.04045 ? 1/12.92 * v : pow12_5(1/1.055 * (v + 0.055))
278274
end
279275

280-
const invert_srgb_compand_n0f8 = [invert_srgb_compand(v/255) for v = 0:255] # LUT
276+
# lookup table for `N0f8` (the extra two elements are for `Float32` splines)
277+
const invert_srgb_compand_n0f8 = [invert_srgb_compand(v/255.0) for v = 0:257]
281278

282279
function invert_srgb_compand(v::N0f8)
283-
invert_srgb_compand_n0f8[reinterpret(UInt8, v) + 1]
280+
@inbounds invert_srgb_compand_n0f8[reinterpret(UInt8, v) + 1]
281+
end
282+
283+
function invert_srgb_compand(v::Float32)
284+
i = unsafe_trunc(Int32, v * 255)
285+
(i < 13 || i > 255) && return invert_srgb_compand(Float64(v))
286+
@inbounds y = view(invert_srgb_compand_n0f8, i:i+3)
287+
dv = v * 255.0 - i
288+
dv == 0.0 && @inbounds return y[2]
289+
if v < 0.38857287f0
290+
return @fastmath(y[2]+0.5*dv*((-2/3*y[1]- y[2])+(2y[3]-1/3*y[4])+
291+
dv*(( y[1]-2y[2])+ y[3]-
292+
dv*(( 1/3*y[1]- y[2])+( y[3]-1/3*y[4]) ))))
293+
else
294+
return @fastmath(y[2]+0.5*dv*((4y[3]-3y[2])-y[4]+dv*((y[4]-y[3])+(y[2]-y[3]))))
295+
end
284296
end
285297

286298
function cnvt(::Type{XYZ{T}}, c::AbstractRGB) where T

src/utilities.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,19 @@ end
1414
# mod6 supports the input `x` in [-2^28, 2^29]
1515
mod6(::Type{T}, x::Int32) where T = unsafe_trunc(T, x - 6 * ((widemul(x, 0x2aaaaaaa) + Int64(0x20000000)) >> 0x20))
1616

17+
# TODO: move `pow7` from "src/differences.jl" to here
18+
19+
pow3_4(x) = (y = @fastmath(sqrt(x)); y*@fastmath(sqrt(y))) # x^(3/4)
20+
21+
# `pow5_12` is called from `srgb_compand`.
22+
# `cbrt` generates a function call, so there is little benefit of `@fastmath`.
23+
pow5_12(x) = pow3_4(x) / cbrt(x) # 5/12 == 1/2 + 1/4 - 1/3 == 3/4 - 1/3
24+
25+
# `pow12_5` is called from `invert_srgb_compand`.
26+
# x^y ≈ exp(y*log(x)) ≈ exp2(y*log2(y)); the middle form is faster
27+
@noinline pow12_5(x) = x^2 * exp(0.4 * log(x)) # 12/5 == 2.4 == 2 + 0.4
28+
29+
1730
# Linear interpolation in [a, b] where x is in [0,1],
1831
# or coerced to be if not.
1932
function lerp(x, a, b)

test/conversion.jl

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,7 @@ using ColorTypes: eltype_default, parametric3
2020
# srgb_compand / invert_srgb_compand
2121
@test Colors.srgb_compand(0.5) 0.7353569830524494 atol=eps()
2222
@test Colors.invert_srgb_compand(0.7353569830524494) 0.5 atol=eps()
23-
# issue #351
24-
l_pow_x_y() = for i=1:1000; (i/1000)^(1/2.4) end
25-
l_exp_y_log_x() = for i=1:1000; exp(1/2.4*log(i/1000)) end
26-
l_pow_x_y(); t_pow_x_y = @elapsed l_pow_x_y()
27-
l_exp_y_log_x(); t_exp_y_log_x = @elapsed l_exp_y_log_x()
28-
if t_exp_y_log_x > t_pow_x_y
29-
@warn "Optimization in `[invert_]srgb_compand()` may have the opposite effect."
30-
end
23+
@test Colors.invert_srgb_compand(0.735357f0) 0.5f0 atol=eps(Float32)
3124

3225
fractional_types = (RGB, BGR, XRGB, RGBX) # types that support Fractional
3326

test/utilities.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
11
using Colors, FixedPointNumbers, Test, InteractiveUtils
22

33
@testset "Utilities" begin
4+
# issue #351
5+
xs = max.(rand(1000), 1e-4)
6+
@noinline l_pow_x_y() = for x in xs; x^2.4 end
7+
@noinline l_pow12_5() = for x in xs; Colors.pow12_5(x) end
8+
l_pow_x_y(); t_pow_x_y = @elapsed l_pow_x_y()
9+
l_pow12_5(); t_pow12_5 = @elapsed l_pow12_5()
10+
if t_pow12_5 > t_pow_x_y
11+
@warn "Optimization technique in `pow12_5` may have the opposite effect."
12+
end
13+
@noinline l_exp_y_log_x() = for x in xs; exp(1/2.4 * log(x)) end
14+
@noinline l_pow5_12() = for x in xs; Colors.pow5_12(x) end
15+
l_exp_y_log_x(); t_exp_y_log_x = @elapsed l_exp_y_log_x()
16+
l_pow5_12(); t_pow5_12 = @elapsed l_pow5_12()
17+
if t_pow5_12 > t_exp_y_log_x
18+
@warn "Optimization technique in `pow5_12` may have the opposite effect."
19+
end
420

521
@testset "hex" begin
622
base_hex = @test_logs (:warn, r"Base\.hex\(c\) has been moved") Base.hex(RGB(1,0.5,0))

0 commit comments

Comments
 (0)