@@ -41,27 +41,63 @@ rawone(v) = reinterpret(one(v))
41
41
function Normed {T,f} (x:: Normed{T2} ) where {T <: Unsigned ,T2 <: Unsigned ,f}
42
42
U = Normed{T,f}
43
43
y = round ((rawone (U)/ rawone (x))* reinterpret (x))
44
- ( 0 <= y) & (y <= typemax (T) ) || throw_converterror (U, x)
44
+ 0 <= y <= typemax (T) || throw_converterror (U, x)
45
45
reinterpret (U, _unsafe_trunc (T, y))
46
46
end
47
47
N0f16 (x:: N0f8 ) = reinterpret (N0f16, convert (UInt16, 0x0101 * reinterpret (x)))
48
48
49
- (:: Type{U} )(x:: Real ) where {U <: Normed } = _convert (U, rawtype (U), x)
49
+ (:: Type{U} )(x:: Real ) where {U <: Normed } = _convert (U, x)
50
50
51
- function _convert (:: Type{U} , :: Type{T} , x) where {U <: Normed ,T}
52
- y = round (widen1 (rawone (U))* x)
53
- (0 <= y) & (y <= typemax (T)) || throw_converterror (U, x)
54
- U (_unsafe_trunc (T, y), 0 )
51
+ function _convert (:: Type{U} , x) where {T, f, U <: Normed{T,f} }
52
+ if T == UInt128 # for UInt128, we can't widen
53
+ # the following range is not exact.
54
+ 0 <= x <= ((typemax (T)- rawone (U))/ rawone (U)+ 1 ) || throw_converterror (U, x)
55
+ y = round (rawone (U)* x)
56
+ else
57
+ y = round (widen1 (rawone (U))* x)
58
+ 0 <= y <= typemax (T) || throw_converterror (U, x)
59
+ end
60
+ reinterpret (U, _unsafe_trunc (T, y))
55
61
end
56
62
# Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
57
- _convert (:: Type{U} , :: Type{T} , x:: Float16 ) where {U <: Normed ,T} =
58
- _convert (U, T, Float32 (x))
59
- _convert (:: Type{U} , :: Type{UInt128} , x:: Float16 ) where {U <: Normed } =
60
- _convert (U, UInt128, Float32 (x))
61
- function _convert (:: Type{U} , :: Type{UInt128} , x) where {U <: Normed }
62
- y = round (rawone (U)* x) # for UInt128, we can't widen
63
- (0 <= y) & (y <= typemax (UInt128)) & (x <= Float64 (typemax (U))) || throw_converterror (U, x)
64
- U (_unsafe_trunc (UInt128, y), 0 )
63
+ function _convert (:: Type{U} , x:: Float16 ) where {T, f, U <: Normed{T,f} }
64
+ if Float16 (typemax (T)/ rawone (U)) > Float32 (typemax (T)/ rawone (U))
65
+ x == Float16 (typemax (T)/ rawone (U)) && return typemax (U)
66
+ end
67
+ return _convert (U, Float32 (x))
68
+ end
69
+ function _convert (:: Type{U} , x:: Tf ) where {T, f, U <: Normed{T,f} , Tf <: Union{Float32, Float64} }
70
+ if T == UInt128 && f == 53
71
+ 0 <= x <= Tf (3.777893186295717e22 ) || throw_converterror (U, x)
72
+ else
73
+ 0 <= x <= Tf ((typemax (T)- rawone (U))/ rawone (U)+ 1 ) || throw_converterror (U, x)
74
+ end
75
+
76
+ significand_bits = Tf == Float64 ? 52 : 23
77
+ if f <= (significand_bits + 1 ) && sizeof (T) * 8 < significand_bits
78
+ return reinterpret (U, unsafe_trunc (T, round (rawone (U) * x)))
79
+ end
80
+ # cf. the implementation of `frexp`
81
+ Tw = f < sizeof (T) * 8 ? T : widen1 (T)
82
+ bits = sizeof (Tw) * 8 - 1
83
+ xu = reinterpret (Tf == Float64 ? UInt64 : UInt32, x)
84
+ k = Int (xu >> significand_bits)
85
+ k == 0 && return zero (U) # neglect subnormal numbers
86
+ significand = xu | (one (xu) << significand_bits)
87
+ yh = unsafe_trunc (Tw, significand) << (bits - significand_bits)
88
+ exponent_bias = Tf == Float64 ? 1023 : 127
89
+ ex = exponent_bias - k + bits - f
90
+ if ex <= 0
91
+ yi = yh - (yh >> f)
92
+ ex == 0 && return reinterpret (U, unsafe_trunc (T, yi))
93
+ if ex == - 1
94
+ (yi & (one (Tw) << bits)) != 0 && return typemax (U)
95
+ return reinterpret (U, unsafe_trunc (T, yi + yi))
96
+ end
97
+ return typemax (U)
98
+ end
99
+ yi = bits >= f ? yh - (yh >> f) + (one (Tw)<< (ex - 1 )) : yh
100
+ return reinterpret (U, unsafe_trunc (T, yi >> ex))
65
101
end
66
102
67
103
rem (x:: T , :: Type{T} ) where {T <: Normed } = x
0 commit comments