Skip to content

Commit 0ae5b82

Browse files
authored
Improve accuracy of rem with Normed types (e.g. ::Float32 % N0f32) (Fixes #150) (#166)
1 parent 930f19a commit 0ae5b82

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

src/normed.jl

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,24 @@ function _convert(::Type{N}, x::Tf) where {T, f, N <: Normed{T,f}, Tf <: Union{F
101101
return reinterpret(N, unsafe_trunc(T, yi >> (ex & bits)))
102102
end
103103

104-
rem(x::T, ::Type{T}) where {T <: Normed} = x
105-
rem(x::Normed, ::Type{T}) where {T <: Normed} = reinterpret(T, _unsafe_trunc(rawtype(T), round((rawone(T)/rawone(x))*reinterpret(x))))
106-
rem(x::Real, ::Type{T}) where {T <: Normed} = reinterpret(T, _unsafe_trunc(rawtype(T), round(rawone(T)*x)))
107-
rem(x::Float16, ::Type{T}) where {T <: Normed} = rem(Float32(x), T) # avoid overflow
104+
rem(x::N, ::Type{N}) where {N <: Normed} = x
105+
rem(x::Normed, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round((rawone(N)/rawone(x))*reinterpret(x))))
106+
rem(x::Real, ::Type{N}) where {T, N <: Normed{T}} = reinterpret(N, _unsafe_trunc(T, round(rawone(N)*x)))
107+
rem(x::Float16, ::Type{N}) where {N <: Normed} = rem(Float32(x), N) # avoid overflow
108+
# Float32 and Float64 cannot exactly represent `rawone(N)` with `f` greater than
109+
# the number of their significand bits, resulting in rounding errors (issue #150).
110+
# So, we use another strategy for the large `f`s explained in:
111+
# https://github.com/JuliaMath/FixedPointNumbers.jl/pull/166#issuecomment-574135643
112+
function rem(x::Float32, ::Type{N}) where {f, N <: Normed{UInt32,f}}
113+
f <= 24 && return reinterpret(N, _unsafe_trunc(UInt32, round(rawone(N) * x)))
114+
r = _unsafe_trunc(UInt32, round(x * @f32(0x1p24)))
115+
reinterpret(N, r << UInt8(f - 24) - unsigned(signed(r) >> 0x18))
116+
end
117+
function rem(x::Float64, ::Type{N}) where {f, N <: Normed{UInt64,f}}
118+
f <= 53 && return reinterpret(N, _unsafe_trunc(UInt64, round(rawone(N) * x)))
119+
r = _unsafe_trunc(UInt64, round(x * 0x1p53))
120+
reinterpret(N, r << UInt8(f - 53) - unsigned(signed(r) >> 0x35))
121+
end
108122

109123

110124
function (::Type{T})(x::Normed) where {T <: AbstractFloat}

test/normed.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ end
200200

201201
@test 1 % N0f8 == 1
202202
@test 2 % N0f8 == N0f8(0.996)
203+
204+
# issue #150
205+
@test all(f -> 1.0f0 % Normed{UInt32,f} == oneunit(Normed{UInt32,f}), 1:32)
206+
@test all(f -> 1.0e0 % Normed{UInt64,f} == oneunit(Normed{UInt64,f}), 1:64)
203207
end
204208

205209
@testset "bitwise" begin

0 commit comments

Comments
 (0)