Skip to content

Commit a142308

Browse files
authored
Fix % Normed{UInt32} on ARM and Improve NaN % FixedPoint (#223)
This reduces the environment-/optimization-dependent instability of the results for `NaN`. This also adds tests for `NaN`/`Inf`. This clarifies the behavior regarding `NaN` and `Inf`, which had not been explicitly defined.
1 parent 9ca0d9d commit a142308

File tree

5 files changed

+58
-5
lines changed

5 files changed

+58
-5
lines changed

src/fixed.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,18 @@ function rem(x::Fixed, ::Type{F}) where {T, f, F <: Fixed{T,f}}
103103
end
104104
rem(x::Integer, ::Type{F}) where {T, f, F <: Fixed{T,f}} = F(_unsafe_trunc(T, x) << f, 0)
105105
function rem(x::Real, ::Type{F}) where {T, f, F <: Fixed{T,f}}
106-
y = _unsafe_trunc(promote_type(Int64, T), round(x * @exp2(f)))
106+
if bitwidth(T) < 32
107+
Ti = T
108+
else
109+
isfinite(x) || return zero(F)
110+
Ti = promote_type(Int64, T)
111+
end
112+
Tf = floattype(F)
113+
y = _unsafe_trunc(Ti, round(x * Tf(@exp2(f))))
107114
reinterpret(F, _unsafe_trunc(T, y))
108115
end
109116
function rem(x::BigFloat, ::Type{F}) where {T, f, F <: Fixed{T,f}}
117+
isfinite(x) || return zero(F)
110118
reinterpret(F, _unsafe_trunc(T, round(x * @exp2(f))))
111119
end
112120

src/normed.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,18 +110,23 @@ end
110110

111111
rem(x::N, ::Type{N}) where {N <: Normed} = x
112112
rem(x::Normed, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round((rawone(N)/rawone(x))*reinterpret(x))))
113-
rem(x::Real, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round(rawone(N)*x)))
113+
function rem(x::Real, ::Type{N}) where {T, N <: Normed{T}}
114+
bitwidth(T) < 32 || isfinite(x) || return zero(N)
115+
reinterpret(N, _unsafe_trunc(T, round(rawone(N) * x)))
116+
end
114117
rem(x::Float16, ::Type{N}) where {N <: Normed} = rem(Float32(x), N) # avoid overflow
115118
# Float32 and Float64 cannot exactly represent `rawone(N)` with `f` greater than
116119
# the number of their significand bits, resulting in rounding errors (issue #150).
117120
# So, we use another strategy for the large `f`s explained in:
118121
# https://github.com/JuliaMath/FixedPointNumbers.jl/pull/166#issuecomment-574135643
119122
function rem(x::Float32, ::Type{N}) where {f, N <: Normed{UInt32,f}}
123+
isfinite(x) || return zero(N)
120124
f <= 24 && return reinterpret(N, _unsafe_trunc(UInt32, round(rawone(N) * x)))
121125
r = _unsafe_trunc(UInt32, round(x * @f32(0x1p24)))
122126
reinterpret(N, r << UInt8(f - 24) - unsigned(signed(r) >> 0x18))
123127
end
124128
function rem(x::Float64, ::Type{N}) where {f, N <: Normed{UInt64,f}}
129+
isfinite(x) || return zero(N)
125130
f <= 53 && return reinterpret(N, _unsafe_trunc(UInt64, round(rawone(N) * x)))
126131
r = _unsafe_trunc(UInt64, round(x * 0x1p53))
127132
reinterpret(N, r << UInt8(f - 53) - unsigned(signed(r) >> 0x35))

src/utilities.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ _unsafe_trunc(::Type{T}, x::BigFloat) where {T <: Integer} = trunc(BigInt, x) %
4545
if !signbit(signed(unsafe_trunc(UInt, -12.345)))
4646
# a workaround for ARM (issue #134)
4747
function _unsafe_trunc(::Type{T}, x::AbstractFloat) where {T <: Integer}
48-
unsafe_trunc(T, unsafe_trunc(signedtype(T), x))
48+
if T === UInt32
49+
copysign(unsafe_trunc(T, abs(x)), x)
50+
else
51+
unsafe_trunc(T, unsafe_trunc(signedtype(T), x))
52+
end
4953
end
5054
end
5155

test/fixed.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@ end
219219
end
220220
end
221221

222+
@testset "conversions from float" begin
223+
@testset "$F(nan)" for F in target(Fixed; ex = :thin)
224+
@test_throws ArgumentError F(Inf)
225+
@test_throws ArgumentError F(-Inf32)
226+
@test_throws ArgumentError F(NaN)
227+
end
228+
end
229+
222230
@testset "conversions to float" begin
223231
for T in (Float16, Float32, Float64)
224232
@test isa(convert(T, Q0f7(0.3)), T)
@@ -283,6 +291,11 @@ end
283291

284292
@test -1 % Q0f7 === Q0f7(-1)
285293
@test -2 % Q0f7 === Q0f7(0)
294+
295+
# TODO: avoid undefined behavior
296+
@testset "nan % $F" for F in target(Fixed, :i8, :i16, :i32, :i64; ex = :thin)
297+
@test NaN % F === NaN32 % F === NaN16 % F === zero(F)
298+
end
286299
end
287300

288301
@testset "neg" begin
@@ -497,6 +510,12 @@ end
497510
@test clamp(0.5, Q0f7) === 0.5Q0f7
498511
@test clamp(-1.5f0, Q0f7) === -1.0Q0f7
499512
@test clamp(1.5Q1f6, Q0f7) === 0.992Q0f7
513+
514+
@testset "clamp(nan, $F)" for F in target(Fixed; ex = :thin)
515+
@test clamp( Inf, F) === clamp( Inf32, F) === typemax(F)
516+
@test clamp(-Inf, F) === clamp(-Inf32, F) === typemin(F)
517+
@test clamp( NaN, F) === clamp( NaN32, F) === zero(F)
518+
end
500519
end
501520

502521
@testset "sign-related functions" begin

test/normed.jl

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ end
184184
end
185185
end
186186

187-
@testset "conversion from float" begin
187+
@testset "conversions from float" begin
188188
# issue 102
189189
for Tf in (Float16, Float32, Float64)
190190
@testset "$N(::$Tf)" for N in target(Normed)
@@ -213,6 +213,12 @@ end
213213
@test N0f32(Float32(0x0.7FFFFFp-32)) == zero(N0f32)
214214
@test N0f32(Float32(0x0.800000p-32)) <= eps(N0f32) # should be zero in RoundNearest mode
215215
@test N0f32(Float32(0x0.800001p-32)) == eps(N0f32)
216+
217+
@testset "$N(nan)" for N in target(Normed; ex = :thin)
218+
@test_throws ArgumentError N(Inf)
219+
@test_throws ArgumentError N(-Inf32)
220+
@test_throws ArgumentError N(NaN)
221+
end
216222
end
217223

218224
@testset "conversions to float" begin
@@ -275,9 +281,14 @@ end
275281
@test all(f -> 1.0f0 % Normed{UInt32,f} == oneunit(Normed{UInt32,f}), 1:32)
276282
@test all(f -> 1.0e0 % Normed{UInt64,f} == oneunit(Normed{UInt64,f}), 1:64)
277283

278-
# issu #211
284+
# issue #211
279285
@test big"1.2" % N0f8 === 0.196N0f8
280286
@test reinterpret(BigFloat(0x0_01234567_89abcdef) % N63f1) === 0x01234567_89abcdef
287+
288+
# TODO: avoid undefined behavior
289+
@testset "nan % $N" for N in target(Normed, :i8, :i16, :i32, :i64; ex = :thin)
290+
@test NaN % N === NaN32 % N === NaN16 % N == zero(N)
291+
end
281292
end
282293

283294
@testset "arithmetic" begin
@@ -506,6 +517,12 @@ end
506517
@test clamp(0.5, N0f8) === 0.5N0f8
507518
@test clamp(-1.0f0, N0f8) === 0.0N0f8
508519
@test clamp(2.0N1f7, N0f8) === 1.0N0f8
520+
521+
@testset "clamp(nan, $N)" for N in target(Normed; ex = :thin)
522+
@test clamp( Inf, N) === clamp( Inf32, N) === typemax(N)
523+
@test clamp(-Inf, N) === clamp(-Inf32, N) === typemin(N)
524+
@test clamp( NaN, N) === clamp( NaN32, N) === zero(N)
525+
end
509526
end
510527

511528
@testset "sign-related functions" begin

0 commit comments

Comments
 (0)