Fix % Normed{UInt32} on ARM and Improve NaN % FixedPoint (#223)

kimikage · web-flow · commit a14230896695 · 2020-08-29T10:07:05.000+09:00
This reduces the environment-/optimization-dependent instability of the results for `NaN`.
This also adds tests for `NaN`/`Inf`.
This clarifies the behavior regarding `NaN` and `Inf`, which had not been explicitly defined.
diff --git a/src/fixed.jl b/src/fixed.jl
@@ -103,10 +103,18 @@ function rem(x::Fixed, ::Type{F}) where {T, f, F <: Fixed{T,f}}
 end
 rem(x::Integer, ::Type{F}) where {T, f, F <: Fixed{T,f}} = F(_unsafe_trunc(T, x) << f, 0)
 function rem(x::Real, ::Type{F}) where {T, f, F <: Fixed{T,f}}
-    y = _unsafe_trunc(promote_type(Int64, T), round(x * @exp2(f)))
+    if bitwidth(T) < 32
+        Ti = T
+    else
+        isfinite(x) || return zero(F)
+        Ti = promote_type(Int64, T)
+    end
+    Tf = floattype(F)
+    y = _unsafe_trunc(Ti, round(x * Tf(@exp2(f))))
     reinterpret(F, _unsafe_trunc(T, y))
 end
 function rem(x::BigFloat, ::Type{F}) where {T, f, F <: Fixed{T,f}}
+    isfinite(x) || return zero(F)
     reinterpret(F, _unsafe_trunc(T, round(x * @exp2(f))))
 end
 
diff --git a/src/normed.jl b/src/normed.jl
@@ -110,18 +110,23 @@ end
 
 rem(x::N, ::Type{N}) where {N <: Normed} = x
 rem(x::Normed, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round((rawone(N)/rawone(x))*reinterpret(x))))
-rem(x::Real, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round(rawone(N)*x)))
+function rem(x::Real, ::Type{N}) where {T, N <: Normed{T}}
+    bitwidth(T) < 32 || isfinite(x) || return zero(N)
+    reinterpret(N, _unsafe_trunc(T, round(rawone(N) * x)))
+end
 rem(x::Float16, ::Type{N}) where {N <: Normed} = rem(Float32(x), N)  # avoid overflow
 # Float32 and Float64 cannot exactly represent `rawone(N)` with `f` greater than
 # the number of their significand bits, resulting in rounding errors (issue #150).
 # So, we use another strategy for the large `f`s explained in:
 # https://github.com/JuliaMath/FixedPointNumbers.jl/pull/166#issuecomment-574135643
 function rem(x::Float32, ::Type{N}) where {f, N <: Normed{UInt32,f}}
+    isfinite(x) || return zero(N)
     f <= 24 && return reinterpret(N, _unsafe_trunc(UInt32, round(rawone(N) * x)))
     r = _unsafe_trunc(UInt32, round(x * @f32(0x1p24)))
     reinterpret(N, r << UInt8(f - 24) - unsigned(signed(r) >> 0x18))
 end
 function rem(x::Float64, ::Type{N}) where {f, N <: Normed{UInt64,f}}
+    isfinite(x) || return zero(N)
     f <= 53 && return reinterpret(N, _unsafe_trunc(UInt64, round(rawone(N) * x)))
     r = _unsafe_trunc(UInt64, round(x * 0x1p53))
     reinterpret(N, r << UInt8(f - 53) - unsigned(signed(r) >> 0x35))
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -45,7 +45,11 @@ _unsafe_trunc(::Type{T}, x::BigFloat) where {T <: Integer} = trunc(BigInt, x) %
 if !signbit(signed(unsafe_trunc(UInt, -12.345)))
     # a workaround for ARM (issue #134)
     function _unsafe_trunc(::Type{T}, x::AbstractFloat) where {T <: Integer}
-        unsafe_trunc(T, unsafe_trunc(signedtype(T), x))
+        if T === UInt32
+            copysign(unsafe_trunc(T, abs(x)), x)
+        else
+            unsafe_trunc(T, unsafe_trunc(signedtype(T), x))
+        end
     end
 end
 
diff --git a/test/fixed.jl b/test/fixed.jl
@@ -219,6 +219,14 @@ end
     end
 end
 
+@testset "conversions from float" begin
+    @testset "$F(nan)" for F in target(Fixed; ex = :thin)
+        @test_throws ArgumentError F(Inf)
+        @test_throws ArgumentError F(-Inf32)
+        @test_throws ArgumentError F(NaN)
+    end
+end
+
 @testset "conversions to float" begin
     for T in (Float16, Float32, Float64)
         @test isa(convert(T, Q0f7(0.3)), T)
@@ -283,6 +291,11 @@ end
 
     @test -1 % Q0f7 === Q0f7(-1)
     @test -2 % Q0f7 === Q0f7(0)
+
+    # TODO: avoid undefined behavior
+    @testset "nan % $F" for F in target(Fixed, :i8, :i16, :i32, :i64; ex = :thin)
+        @test NaN % F === NaN32 % F === NaN16 % F === zero(F)
+    end
 end
 
 @testset "neg" begin
@@ -497,6 +510,12 @@ end
     @test clamp(0.5,     Q0f7) === 0.5Q0f7
     @test clamp(-1.5f0,  Q0f7) === -1.0Q0f7
     @test clamp(1.5Q1f6, Q0f7) === 0.992Q0f7
+
+    @testset "clamp(nan, $F)" for F in target(Fixed; ex = :thin)
+        @test clamp( Inf, F) === clamp( Inf32, F) === typemax(F)
+        @test clamp(-Inf, F) === clamp(-Inf32, F) === typemin(F)
+        @test clamp( NaN, F) === clamp( NaN32, F) === zero(F)
+    end
 end
 
 @testset "sign-related functions" begin
diff --git a/test/normed.jl b/test/normed.jl
@@ -184,7 +184,7 @@ end
     end
 end
 
-@testset "conversion from float" begin
+@testset "conversions from float" begin
     # issue 102
     for Tf in (Float16, Float32, Float64)
         @testset "$N(::$Tf)" for N in target(Normed)
@@ -213,6 +213,12 @@ end
     @test N0f32(Float32(0x0.7FFFFFp-32)) == zero(N0f32)
     @test N0f32(Float32(0x0.800000p-32)) <= eps(N0f32) # should be zero in RoundNearest mode
     @test N0f32(Float32(0x0.800001p-32)) == eps(N0f32)
+
+    @testset "$N(nan)" for N in target(Normed; ex = :thin)
+        @test_throws ArgumentError N(Inf)
+        @test_throws ArgumentError N(-Inf32)
+        @test_throws ArgumentError N(NaN)
+    end
 end
 
 @testset "conversions to float" begin
@@ -275,9 +281,14 @@ end
     @test all(f -> 1.0f0 % Normed{UInt32,f} == oneunit(Normed{UInt32,f}), 1:32)
     @test all(f -> 1.0e0 % Normed{UInt64,f} == oneunit(Normed{UInt64,f}), 1:64)
 
-    # issu #211
+    # issue #211
     @test big"1.2" % N0f8 === 0.196N0f8
     @test reinterpret(BigFloat(0x0_01234567_89abcdef) % N63f1) === 0x01234567_89abcdef
+
+    # TODO: avoid undefined behavior
+    @testset "nan % $N" for N in target(Normed, :i8, :i16, :i32, :i64; ex = :thin)
+        @test NaN % N === NaN32 % N ===  NaN16 % N == zero(N)
+    end
 end
 
 @testset "arithmetic" begin
@@ -506,6 +517,12 @@ end
     @test clamp(0.5,     N0f8) === 0.5N0f8
     @test clamp(-1.0f0,  N0f8) === 0.0N0f8
     @test clamp(2.0N1f7, N0f8) === 1.0N0f8
+
+    @testset "clamp(nan, $N)" for N in target(Normed; ex = :thin)
+        @test clamp( Inf, N) === clamp( Inf32, N) === typemax(N)
+        @test clamp(-Inf, N) === clamp(-Inf32, N) === typemin(N)
+        @test clamp( NaN, N) === clamp( NaN32, N) === zero(N)
+    end
 end
 
 @testset "sign-related functions" begin