@@ -46,22 +46,56 @@ function Normed{T,f}(x::Normed{T2}) where {T <: Unsigned,T2 <: Unsigned,f}
46
46
end
47
47
N0f16 (x:: N0f8 ) = reinterpret (N0f16, convert (UInt16, 0x0101 * reinterpret (x)))
48
48
49
- (:: Type{U} )(x:: Real ) where {U <: Normed } = _convert (U, rawtype (U), x)
50
-
51
- function _convert (:: Type{U} , :: Type{T} , x) where {U <: Normed ,T}
52
- y = round (widen1 (rawone (U))* x)
53
- (0 <= y) & (y <= typemax (T)) || throw_converterror (U, x)
54
- U (_unsafe_trunc (T, y), 0 )
49
+ (:: Type{U} )(x:: Real ) where {U <: Normed } = _convert (U, x)
50
+
51
+ function _convert (:: Type{U} , x) where {T, f, U <: Normed{T,f} }
52
+ if T == UInt128 # for UInt128, we can't widen
53
+ # the upper limit is not exact
54
+ (0 <= x) & (x <= (typemax (T)/ rawone (U))) || throw_converterror (U, x)
55
+ y = round (rawone (U)* x)
56
+ else
57
+ y = round (widen1 (rawone (U))* x)
58
+ (0 <= y) & (y <= typemax (T)) || throw_converterror (U, x)
59
+ end
60
+ reinterpret (U, _unsafe_trunc (T, y))
55
61
end
56
62
# Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
57
- _convert (:: Type{U} , :: Type{T} , x:: Float16 ) where {U <: Normed ,T} =
58
- _convert (U, T, Float32 (x))
59
- _convert (:: Type{U} , :: Type{UInt128} , x:: Float16 ) where {U <: Normed } =
60
- _convert (U, UInt128, Float32 (x))
61
- function _convert (:: Type{U} , :: Type{UInt128} , x) where {U <: Normed }
62
- y = round (rawone (U)* x) # for UInt128, we can't widen
63
- (0 <= y) & (y <= typemax (UInt128)) & (x <= Float64 (typemax (U))) || throw_converterror (U, x)
64
- U (_unsafe_trunc (UInt128, y), 0 )
63
+ function _convert (:: Type{U} , x:: Float16 ) where {T, f, U <: Normed{T,f} }
64
+ if Float16 (typemax (T)/ rawone (U)) > Float32 (typemax (T)/ rawone (U))
65
+ x == Float16 (typemax (T)/ rawone (U)) && return typemax (U)
66
+ end
67
+ return _convert (U, Float32 (x))
68
+ end
69
+ function _convert (:: Type{U} , x:: Tf ) where {T, f, U <: Normed{T,f} , Tf <: Union{Float32, Float64} }
70
+ if T == UInt128 && f == 53
71
+ 0 <= x <= Tf (3.777893186295717e22 ) || throw_converterror (U, x)
72
+ else
73
+ 0 <= x <= Tf ((typemax (T)- rawone (U))/ rawone (U)+ 1 ) || throw_converterror (U, x)
74
+ end
75
+
76
+ significand_bits = Tf == Float64 ? 52 : 23
77
+ if f <= (significand_bits + 1 ) && sizeof (T) * 8 < significand_bits
78
+ return reinterpret (U, unsafe_trunc (T, round (rawone (U) * x)))
79
+ end
80
+ # cf. the implementation of `frexp`
81
+ Tw = f < sizeof (T) * 8 ? T : widen1 (T)
82
+ bits = sizeof (Tw) * 8 - 1
83
+ xu = reinterpret (Tf == Float64 ? UInt64 : UInt32, x)
84
+ k = Int (xu >> significand_bits)
85
+ k == 0 && return zero (U) # neglect subnormal numbers
86
+ significand = xu | (one (xu) << significand_bits)
87
+ yh = unsafe_trunc (Tw, significand) << (bits - significand_bits)
88
+ exponent_bias = Tf == Float64 ? 1023 : 127
89
+ ex = exponent_bias - k + bits - f
90
+ yi = bits >= f ? yh - (yh >> f) : yh
91
+ if ex <= 0
92
+ ex == 0 && return reinterpret (U, unsafe_trunc (T, yi))
93
+ ex != - 1 || signbit (signed (yi)) && return typemax (U)
94
+ return reinterpret (U, unsafe_trunc (T, yi + yi))
95
+ end
96
+ ex > bits && return reinterpret (U, ex == bits + 1 ? one (T) : zero (T))
97
+ yi += one (Tw)<< ((ex - 1 ) & bits) # RoundNearestTiesUp
98
+ return reinterpret (U, unsafe_trunc (T, yi >> (ex & bits)))
65
99
end
66
100
67
101
rem (x:: T , :: Type{T} ) where {T <: Normed } = x
0 commit comments