@@ -101,10 +101,24 @@ function _convert(::Type{N}, x::Tf) where {T, f, N <: Normed{T,f}, Tf <: Union{F
101
101
return reinterpret (N, unsafe_trunc (T, yi >> (ex & bits)))
102
102
end
103
103
104
- rem (x:: T , :: Type{T} ) where {T <: Normed } = x
105
- rem (x:: Normed , :: Type{T} ) where {T <: Normed } = reinterpret (T, _unsafe_trunc (rawtype (T), round ((rawone (T)/ rawone (x))* reinterpret (x))))
106
- rem (x:: Real , :: Type{T} ) where {T <: Normed } = reinterpret (T, _unsafe_trunc (rawtype (T), round (rawone (T)* x)))
107
- rem (x:: Float16 , :: Type{T} ) where {T <: Normed } = rem (Float32 (x), T) # avoid overflow
104
+ rem (x:: N , :: Type{N} ) where {N <: Normed } = x
105
+ rem (x:: Normed , :: Type{N} ) where {T, N <: Normed{T} } = reinterpret (N, _unsafe_trunc (T, round ((rawone (N)/ rawone (x))* reinterpret (x))))
106
+ rem (x:: Real , :: Type{N} ) where {T, N <: Normed{T} } = reinterpret (N, _unsafe_trunc (T, round (rawone (N)* x)))
107
+ rem (x:: Float16 , :: Type{N} ) where {N <: Normed } = rem (Float32 (x), N) # avoid overflow
108
+ # Float32 and Float64 cannot exactly represent `rawone(N)` with `f` greater than
109
+ # the number of their significand bits, resulting in rounding errors (issue #150).
110
+ # So, we use another strategy for the large `f`s explained in:
111
+ # https://github.com/JuliaMath/FixedPointNumbers.jl/pull/166#issuecomment-574135643
112
+ function rem (x:: Float32 , :: Type{N} ) where {f, N <: Normed{UInt32,f} }
113
+ f <= 24 && return reinterpret (N, _unsafe_trunc (UInt32, round (rawone (N) * x)))
114
+ r = _unsafe_trunc (UInt32, round (x * @f32 (0x1 p24)))
115
+ reinterpret (N, r << UInt8 (f - 24 ) - unsigned (signed (r) >> 0x18 ))
116
+ end
117
+ function rem (x:: Float64 , :: Type{N} ) where {f, N <: Normed{UInt64,f} }
118
+ f <= 53 && return reinterpret (N, _unsafe_trunc (UInt64, round (rawone (N) * x)))
119
+ r = _unsafe_trunc (UInt64, round (x * 0x1 p53))
120
+ reinterpret (N, r << UInt8 (f - 53 ) - unsigned (signed (r) >> 0x35 ))
121
+ end
108
122
109
123
110
124
function (:: Type{T} )(x:: Normed ) where {T <: AbstractFloat }
0 commit comments