1
- # Normed{T,f} maps UInts from 0 to 2^f-1 to the range [0.0, 1.0]
1
+ # Normed{T,f} maps Integers from 0 to 2^f-1 to the range [0.0, 1.0]
2
2
# For example, Normed{UInt8,8} == N0f8 maps 0x00 to 0.0 and 0xff to 1.0
3
3
4
- struct Normed{T<: Unsigned ,f} <: FixedPoint{T,f}
4
+ struct Normed{T<: Integer ,f} <: FixedPoint{T,f}
5
5
i:: T
6
6
7
- Normed {T, f} (i:: Integer ,_) where {T,f} = new {T, f} (i% T) # for setting by raw representation
7
+ function Normed {T, f} (i:: Integer ,_) where {T,f} # 2-arg form for setting by raw representation
8
+ isa (f, Int) || error (" f must be an Int" )
9
+ 0 < f <= sizeof (T)* 8 - (T<: Signed ) || error (" f must be between 1 and the number of non-sign bits" )
10
+ new {T, f} (i% T)
11
+ end
8
12
end
9
13
10
14
Normed {T, f} (x:: AbstractChar ) where {T,f} = throw (ArgumentError (" Normed cannot be constructed from a Char" ))
11
15
Normed {T, f} (x:: Complex ) where {T,f} = Normed {T, f} (convert (real (typeof (x)), x))
12
16
Normed {T, f} (x:: Base.TwicePrecision ) where {T,f} = Normed {T, f} (convert (Float64, x))
13
- Normed {T1,f} (x:: Normed{T2,f} ) where {T1 <: Unsigned ,T2 <: Unsigned ,f} = Normed {T1,f} (convert (T1, x. i), 0 )
14
17
15
- typechar (:: Type{X} ) where {X <: Normed } = ' N'
16
- signbits (:: Type{X} ) where {X <: Normed } = 0
18
+ Normed {T1,f} (x:: Normed{T2,f} ) where {T1 <: Integer ,T2 <: Integer ,f} = Normed {T1,f} (convert (T1, x. i), 0 )
19
+
20
+ typechar (:: Type{X} ) where X <: Normed{T} where T<: Unsigned = ' N'
21
+ typechar (:: Type{X} ) where X <: Normed{T} where T<: Signed = ' S'
22
+ signbits (:: Type{X} ) where X <: Normed{T} where T<: Unsigned = 0
23
+ signbits (:: Type{X} ) where X <: Normed{T} where T<: Signed = 1
24
+
25
+ I64 (:: Type{<:Unsigned} ) = UInt64
26
+ I64 (:: Type{<:Signed} ) = Int64
27
+ I32 (:: Type{<:Unsigned} ) = UInt32
28
+ I32 (:: Type{<:Signed} ) = Int32
17
29
18
- for T in (UInt8, UInt16, UInt32, UInt64)
19
- for f in 1 : sizeof (T)* 8
30
+ for T in (UInt8, UInt16, UInt32, UInt64, Int16, Int32, Int64 )
31
+ for f in 1 : sizeof (T)* 8 - (T <: Signed )
20
32
sym = Symbol (String (take! (showtype (_iotypealias, Normed{T,f}))))
21
33
@eval begin
22
34
const $ sym = Normed{$ T,$ f}
@@ -25,77 +37,82 @@ for T in (UInt8, UInt16, UInt32, UInt64)
25
37
end
26
38
end
27
39
28
- reinterpret (:: Type{Normed{T,f}} , x:: T ) where {T <: Unsigned ,f} = Normed {T,f} (x, 0 )
40
+ reinterpret (:: Type{Normed{T,f}} , x:: T ) where {T <: Integer ,f} = Normed {T,f} (x, 0 )
29
41
30
- zero (:: Type{Normed{T,f}} ) where {T,f} = Normed {T,f} (zero (T),0 )
31
- function oneunit (:: Type{T } ) where {T <: Normed }
32
- T (typemax (rawtype (T )) >> (8 * sizeof (T )- nbitsfrac (T )), 0 )
42
+ zero (:: Type{Normed{T,f}} ) where {T <: Integer ,f} = Normed {T,f} (zero (T),0 )
43
+ function oneunit (:: Type{N } ) where N <: Normed{T,f} where {T,f }
44
+ N (typemax (rawtype (N )) >> (8 * sizeof (N )- nbitsfrac (N) - (T <: Signed )), 0 )
33
45
end
34
46
one (:: Type{T} ) where {T <: Normed } = oneunit (T)
35
47
zero (x:: Normed ) = zero (typeof (x))
36
48
oneunit (x:: Normed ) = one (typeof (x))
37
49
one (x:: Normed ) = oneunit (x)
38
50
rawone (v) = reinterpret (one (v))
39
51
40
- # Conversions
41
- function Normed {T,f} (x:: Normed{T2} ) where {T <: Unsigned ,T2 <: Unsigned ,f}
52
+ # More construction and conversion
53
+ function Normed {T,f} (x:: Normed{T2} ) where {T <: Integer ,T2 <: Integer ,f}
42
54
U = Normed{T,f}
43
55
y = round ((rawone (U)/ rawone (x))* reinterpret (x))
44
- (0 <= y) & (y <= typemax (T)) || throw_converterror (U, x)
56
+ (typemin (T) <= y) & (y <= typemax (T)) || throw_converterror (U, x)
45
57
reinterpret (U, _unsafe_trunc (T, y))
46
58
end
47
59
N0f16 (x:: N0f8 ) = reinterpret (N0f16, convert (UInt16, 0x0101 * reinterpret (x)))
48
60
49
- (:: Type{U } )(x:: Real ) where {U <: Normed } = _convert (U , x)
61
+ (:: Type{N } )(x:: Real ) where {N <: Normed } = _convert (N , x)
50
62
51
- function _convert (:: Type{U } , x) where {T, f, U <: Normed{T,f} }
52
- if T == UInt128 # for UInt128 , we can't widen
63
+ function _convert (:: Type{N } , x) where {T <: Integer , f, N <: Normed{T,f} }
64
+ if T === UInt128 || T === Int128 # for [U]Int128 , we can't widen
53
65
# the upper limit is not exact
54
- (0 <= x) & (x <= (typemax (T)/ rawone (U ))) || throw_converterror (U , x)
55
- y = round (rawone (U )* x)
66
+ (( typemin (T) / rawone (N)) <= x) & (x <= (typemax (T)/ rawone (N ))) || throw_converterror (N , x)
67
+ y = round (rawone (N )* x)
56
68
else
57
- y = round (widen1 (rawone (U ))* x)
58
- (0 <= y) & (y <= typemax (T)) || throw_converterror (U , x)
69
+ y = round (widen1 (rawone (N ))* x)
70
+ (typemin (T) <= y) & (y <= typemax (T)) || throw_converterror (N , x)
59
71
end
60
- reinterpret (U , _unsafe_trunc (T, y))
72
+ reinterpret (N , _unsafe_trunc (T, y))
61
73
end
62
74
# Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
63
- function _convert (:: Type{U} , x:: Float16 ) where {T, f, U <: Normed{T,f} }
64
- if Float16 (typemax (T)/ rawone (U)) > Float32 (typemax (T)/ rawone (U))
65
- x == Float16 (typemax (T)/ rawone (U)) && return typemax (U)
75
+ function _convert (:: Type{N} , x:: Float16 ) where {T <: Integer , f, N <: Normed{T,f} }
76
+ if Float16 (typemax (T)/ rawone (N)) > Float32 (typemax (T)/ rawone (N))
77
+ x == Float16 (typemax (T)/ rawone (N)) && return typemax (N)
78
+ if T <: Signed
79
+ x == Float16 (typemin (T)/ rawone (N)) && return typemin (N)
80
+ end
66
81
end
67
- return _convert (U , Float32 (x))
82
+ return _convert (N , Float32 (x))
68
83
end
69
- function _convert (:: Type{U} , x:: Tf ) where {T, f, U <: Normed{T,f} , Tf <: Union{Float32, Float64} }
70
- if T == UInt128 && f == 53
71
- 0 <= x <= Tf (3.777893186295717e22 ) || throw_converterror (U, x)
84
+ function _convert (:: Type{N} , x:: Tf ) where {T <: Integer , f, N <: Normed{T,f} , Tf <: Union{Float32, Float64} }
85
+ if T === UInt128 && f == 53
86
+ Tf (0 ) <= x <= Tf (3.777893186295717e22 ) || throw_converterror (N, x)
87
+ elseif T === Int128 && f == 53
88
+ Tf (- 1.888946593147859e22 ) <= x <= Tf (1.888946593147859e22 ) || throw_converterror (N, x)
72
89
else
73
- 0 <= x <= Tf ((typemax (T)- rawone (U ))/ rawone (U )+ 1 ) || throw_converterror (U , x)
90
+ Tf (( typemin (T) + rawone (N)) / rawone (N) - 1 ) <= x <= Tf ((typemax (T)- rawone (N ))/ rawone (N )+ 1 ) || throw_converterror (N , x)
74
91
end
75
92
76
- significand_bits = Tf == Float64 ? 52 : 23
93
+ significand_bits = Tf === Float64 ? 52 : 23
77
94
if f <= (significand_bits + 1 ) && sizeof (T) * 8 < significand_bits
78
- return reinterpret (U , unsafe_trunc (T, round (rawone (U ) * x)))
95
+ return reinterpret (N , unsafe_trunc (T, round (rawone (N ) * x)))
79
96
end
80
97
# cf. the implementation of `frexp`
81
98
Tw = f < sizeof (T) * 8 ? T : widen1 (T)
82
99
bits = sizeof (Tw) * 8 - 1
83
- xu = reinterpret (Tf == Float64 ? UInt64 : UInt32 , x)
100
+ xu = reinterpret (Tf === Float64 ? I64 (T) : I32 (T) , x)
84
101
k = Int (xu >> significand_bits)
85
- k == 0 && return zero (U ) # neglect subnormal numbers
102
+ k == 0 && return zero (N ) # neglect subnormal numbers
86
103
significand = xu | (one (xu) << significand_bits)
87
104
yh = unsafe_trunc (Tw, significand) << (bits - significand_bits)
88
105
exponent_bias = Tf == Float64 ? 1023 : 127
89
106
ex = exponent_bias - k + bits - f
90
107
yi = bits >= f ? yh - (yh >> f) : yh
91
108
if ex <= 0
92
- ex == 0 && return reinterpret (U , unsafe_trunc (T, yi))
93
- ex != - 1 || signbit (signed (yi)) && return typemax (U )
94
- return reinterpret (U , unsafe_trunc (T, yi + yi))
109
+ ex == 0 && return reinterpret (N , unsafe_trunc (T, yi))
110
+ ex != - 1 || signbit (signed (yi)) && return typemax (N )
111
+ return reinterpret (N , unsafe_trunc (T, yi + yi))
95
112
end
96
- ex > bits && return reinterpret (U , ex == bits + 1 ? one (T) : zero (T))
113
+ ex > bits && return reinterpret (N , ex == bits + 1 ? one (T) : zero (T))
97
114
yi += one (Tw)<< ((ex - 1 ) & bits) # RoundNearestTiesUp
98
- return reinterpret (U , unsafe_trunc (T, yi >> (ex & bits)))
115
+ return reinterpret (N , unsafe_trunc (T, yi >> (ex & bits)))
99
116
end
100
117
101
118
rem (x:: T , :: Type{T} ) where {T <: Normed } = x
@@ -125,14 +142,18 @@ function (::Type{T})(x::Normed) where {T <: AbstractFloat}
125
142
convert (T, y) # needed for types like Float16 which promote arithmetic to Float32
126
143
end
127
144
128
- function Base. Float16 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt8, UInt16, UInt32} , f}
145
+ # A slightly faster copysign (one that avoids type-piracy)
146
+ setsign (x:: Float32 , i:: UInt32 ) = x
147
+ setsign (x:: Float32 , i:: Int32 ) = reinterpret (Float32, reinterpret (UInt32, x) | (reinterpret (UInt32, i) & reinterpret (UInt32, typemin (Int32))))
148
+
149
+ function Base. Float16 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt8, UInt16, UInt32, Int8, Int16, Int32} , f}
129
150
f == 1 ? Float16 (x. i) : Float16 (Float32 (x))
130
151
end
131
- function Base. Float16 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt64, UInt128} , f}
152
+ function Base. Float16 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt64, UInt128, Int64, Int128 } , f}
132
153
f == 1 ? Float16 (x. i) : Float16 (Float64 (x))
133
154
end
134
155
135
- function Base. Float32 (x:: Normed{UInt8,f} ) where f
156
+ function Base. Float32 (x:: Normed{<:Union{ UInt8,Int8} ,f} ) where f
136
157
f == 1 && return Float32 (x. i)
137
158
f == 2 && return Float32 (Int32 (x. i) * 0x101 ) * @f32 (0x550055 p- 32 )
138
159
f == 3 && return Float32 (Int32 (x. i) * 0x00b ) * @f32 (0xd4c77b p- 30 )
@@ -143,7 +164,7 @@ function Base.Float32(x::Normed{UInt8,f}) where f
143
164
f == 8 && return Float32 (Int32 (x. i) * 0x155 ) * @f32 (0xc0f0fd p- 40 )
144
165
0.0f0
145
166
end
146
- function Base. Float32 (x:: Normed{UInt16,f} ) where f
167
+ function Base. Float32 (x:: Normed{<:Union{ UInt16,Int16} ,f} ) where f
147
168
f32 = Float32 (x. i)
148
169
f == 1 && return f32
149
170
f == 2 && return f32 * @f32 (0x55 p- 8 ) + f32 * @f32 (0x555555 p- 32 )
@@ -155,19 +176,19 @@ function Base.Float32(x::Normed{UInt16,f}) where f
155
176
f == 16 && return f32 * @f32 (0x01 p- 16 ) + f32 * @f32 (0x010001 p- 48 )
156
177
Float32 (x. i / rawone (x))
157
178
end
158
- function Base. Float32 (x:: Normed{UInt32 ,f} ) where f
179
+ function Base. Float32 (x:: Normed{T ,f} ) where {T <: Union{UInt32,Int32} , f}
159
180
f == 1 && return Float32 (x. i)
160
181
i32 = unsafe_trunc (Int32, x. i)
161
182
if f == 32
162
183
rh, rl = Float32 (i32>>> 16 ), Float32 ((i32& 0xFFFF )<< 8 | (i32>>> 24 ))
163
- return muladd (rh, @f32 (0x1 p- 16 ), rl * @f32 (0x1 p- 40 ))
184
+ return setsign ( muladd (rh, @f32 (0x1 p- 16 ), rl * @f32 (0x1 p- 40 )), x . i )
164
185
elseif f >= 25
165
186
rh, rl = Float32 (i32>>> 16 ),Float32 (((i32& 0xFFFF )<< 14 ) + (i32>>> (f- 14 )))
166
- return muladd (rh, Float32 (@exp2 (16 - f)), rl * Float32 (@exp2 (- 14 - f)))
187
+ return setsign ( muladd (rh, Float32 (@exp2 (16 - f)), rl * Float32 (@exp2 (- 14 - f))), x . i )
167
188
end
168
189
# FIXME : avoid the branch in native x86_64 (non-SIMD) codes
169
190
m = ifelse (i32 < 0 , 0x1 p32 * inv_rawone (x), 0.0 )
170
- Float32 (muladd (Float64 (i32), inv_rawone (x), m))
191
+ return setsign ( Float32 (muladd (Float64 (i32), inv_rawone (x), m)), x . i )
171
192
end
172
193
function Base. Float32 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt64, UInt128} , f}
173
194
f == 1 ? Float32 (x. i) : Float32 (Float64 (x))
176
197
function Base. Float64 (x:: Normed{Ti,f} ) where {Ti <: Union{UInt8, UInt16} , f}
177
198
Float64 (Normed {UInt32,f} (x))
178
199
end
179
- function Base. Float64 (x:: Normed{UInt32,f} ) where f
200
+ function Base. Float64 (x:: Normed{Ti,f} ) where {Ti <: Union{Int8, Int16} , f}
201
+ Float64 (Normed {Int32,f} (x))
202
+ end
203
+
204
+ function Base. Float64 (x:: Normed{<:Union{UInt32,Int32},f} ) where f
180
205
f64 = Float64 (x. i)
181
206
f == 1 && return f64
182
207
f == 2 && return (f64 * 0x040001 ) * 0x15555000015555 p- 72
@@ -206,7 +231,7 @@ function Base.Float64(x::Normed{UInt32,f}) where f
206
231
f == 32 && return (f64 * 0x010101 ) * 0x00ff0000ffff01 p- 96
207
232
f64 / rawone (x)
208
233
end
209
- function Base. Float64 (x:: Normed{UInt64,f} ) where f
234
+ function Base. Float64 (x:: Normed{UInt64,f} ) where f # TODO : optimized version for Int64
210
235
f == 1 && return Float64 (x. i)
211
236
if f >= 53
212
237
rh = Float64 (unsafe_trunc (Int64, x. i >> 16 )) * @exp2 (16 - f) # upper 48 bits
@@ -215,7 +240,7 @@ function Base.Float64(x::Normed{UInt64,f}) where f
215
240
end
216
241
x. i / rawone (x)
217
242
end
218
- function Base. Float64 (x:: Normed{UInt128,f} ) where f
243
+ function Base. Float64 (x:: Normed{UInt128,f} ) where f # TODO : optimized version for Int128
219
244
f == 1 && return Float64 (x. i)
220
245
ih, il = unsafe_trunc (Int64, x. i>> 64 ), unsafe_trunc (Int64, x. i)
221
246
rh = Float64 (ih>>> 16 ) * @exp2 (f <= 53 ? 80 : 80 - f) # upper 48 bits
@@ -241,7 +266,8 @@ Base.Rational{Ti}(x::Normed) where {Ti <: Integer} = convert(Ti, reinterpret(x))
241
266
Base. Rational (x:: Normed ) = reinterpret (x)// rawone (x)
242
267
243
268
# Traits
244
- abs (x:: Normed ) = x
269
+ abs (x:: Normed{<:Unsigned} ) = x
270
+ abs (x:: T ) where T<: Normed = T (abs (x. i), 0 )
245
271
246
272
(- )(x:: T ) where {T <: Normed } = T (- reinterpret (x), 0 )
247
273
(~ )(x:: T ) where {T <: Normed } = T (~ reinterpret (x), 0 )
@@ -257,48 +283,38 @@ abs(x::Normed) = x
257
283
258
284
# Functions
259
285
trunc (x:: T ) where {T <: Normed } = T (div (reinterpret (x), rawone (T))* rawone (T),0 )
260
- floor (x:: T ) where {T <: Normed } = trunc (x)
286
+ floor (x:: Normed{T} ) where {T <: Unsigned } = trunc (x)
287
+ function floor (x:: Normed{T} ) where {T <: Signed }
288
+ d, r = divrem (reinterpret (x), rawone (x))
289
+ return typeof (x)((d - signbit (r))* rawone (x), 0 )
290
+ end
261
291
function round (x:: Normed{T,f} ) where {T,f}
262
- mask = convert (T, 1 << (f- 1 ))
263
- y = trunc (x)
264
- return convert (T, reinterpret (x)- reinterpret (y)) & mask> 0 ?
265
- Normed {T,f} (y+ oneunit (Normed{T,f})) : y
292
+ d, r = divrem (reinterpret (x), rawone (x))
293
+ return Normed {T,f} ((d + r>> (f- 1 ))* rawone (x), 0 )
266
294
end
267
295
function ceil (x:: Normed{T,f} ) where {T,f}
268
- k = 8 * sizeof (T)- f
269
- mask = (typemax (T)<< k)>> k
270
- y = trunc (x)
271
- return convert (T, reinterpret (x)- reinterpret (y)) & (mask)> 0 ?
272
- Normed {T,f} (y+ oneunit (Normed{T,f})) : y
296
+ d, r = divrem (reinterpret (x), rawone (x))
297
+ return Normed {T,f} ((d + (r> zero (r)))* rawone (x), 0 )
273
298
end
274
299
275
300
trunc (:: Type{T} , x:: Normed ) where {T <: Integer } = convert (T, div (reinterpret (x), rawone (x)))
276
- round (:: Type{T} , x:: Normed ) where {T <: Integer } = round (T, reinterpret (x)/ rawone (x))
277
- floor (:: Type{T} , x:: Normed ) where {T <: Integer } = trunc (T, x)
278
- ceil (:: Type{T} , x:: Normed ) where {T <: Integer } = ceil (T, reinterpret (x)/ rawone (x))
301
+ round (:: Type{T} , x:: Normed ) where {T <: Integer } = round (T, float (x))
302
+ floor (:: Type{T} , x:: Normed{<:Unsigned} ) where {T <: Integer } = trunc (T, x)
303
+ floor (:: Type{T} , x:: Normed{<:Signed} ) where {T <: Integer } = floor (T, float (x))
304
+ ceil (:: Type{T} , x:: Normed ) where {T <: Integer } = ceil (T, float (x))
279
305
280
306
isfinite (x:: Normed ) = true
281
307
isnan (x:: Normed ) = false
282
308
isinf (x:: Normed ) = false
283
309
284
- bswap (x:: Normed{UInt8,f} ) where {f} = x
285
- bswap (x:: Normed ) = typeof (x)(bswap (reinterpret (x)),0 )
310
+ bswap (x:: Normed{<:Union{ UInt8,Int8} ,f} ) where {f} = x
311
+ bswap (x:: Normed ) = typeof (x)(bswap (reinterpret (x)), 0 )
286
312
287
313
function minmax (x:: T , y:: T ) where {T <: Normed }
288
314
a, b = minmax (reinterpret (x), reinterpret (y))
289
315
T (a,0 ), T (b,0 )
290
316
end
291
317
292
- # Iteration
293
- # The main subtlety here is that iterating over N0f8(0):N0f8(1) will wrap around
294
- # unless we iterate using a wider type
295
- @inline start (r:: StepRange{T} ) where {T <: Normed } = widen1 (reinterpret (r. start))
296
- @inline next (r:: StepRange{T} , i:: Integer ) where {T <: Normed } = (T (i,0 ), i+ reinterpret (r. step))
297
- @inline function done (r:: StepRange{T} , i:: Integer ) where {T <: Normed }
298
- i1, i2 = reinterpret (r. start), reinterpret (r. stop)
299
- isempty (r) | (i < min (i1, i2)) | (i > max (i1, i2))
300
- end
301
-
302
318
function decompose (x:: Normed )
303
319
g = gcd (reinterpret (x), rawone (x))
304
320
div (reinterpret (x),g), 0 , div (rawone (x),g)
0 commit comments