Skip to content

Commit 08c7452

Browse files
committed
Allow signed Normed numbers
1 parent 4b9142b commit 08c7452

File tree

4 files changed

+127
-95
lines changed

4 files changed

+127
-95
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Manifest.toml

src/FixedPointNumbers.jl

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,25 @@ typemin(::Type{T}) where {T <: FixedPoint} = T(typemin(rawtype(T)), 0)
6060
floatmin(::Type{T}) where {T <: FixedPoint} = eps(T)
6161
floatmax(::Type{T}) where {T <: FixedPoint} = typemax(T)
6262

63-
widen1(::Type{Int8}) = Int16
64-
widen1(::Type{UInt8}) = UInt16
65-
widen1(::Type{Int16}) = Int32
66-
widen1(::Type{UInt16}) = UInt32
67-
widen1(::Type{Int32}) = Int64
68-
widen1(::Type{UInt32}) = UInt64
69-
widen1(::Type{Int64}) = Int128
70-
widen1(::Type{UInt64}) = UInt128
71-
widen1(::Type{Int128}) = Int128
63+
widen1(::Type{Int8}) = Int16
64+
widen1(::Type{UInt8}) = UInt16
65+
widen1(::Type{Int16}) = Int32
66+
widen1(::Type{UInt16}) = UInt32
67+
widen1(::Type{Int32}) = Int64
68+
widen1(::Type{UInt32}) = UInt64
69+
widen1(::Type{Int64}) = Int128
70+
widen1(::Type{UInt64}) = UInt128
71+
widen1(::Type{Int128}) = Int128
7272
widen1(::Type{UInt128}) = UInt128
7373
widen1(x::Integer) = x % widen1(typeof(x))
7474

75+
signedwiden1(::Type{UInt8}) = Int16
76+
signedwiden1(::Type{UInt16}) = Int32
77+
signedwiden1(::Type{UInt32}) = Int64
78+
signedwiden1(::Type{UInt64}) = Int128
79+
signedwiden1(::Type{UInt128}) = Int128
80+
signedwiden1(x::Integer) = x % signedwiden1(typeof(x))
81+
7582
const ShortInts = Union{Int8,UInt8,Int16,UInt16}
7683
const LongInts = Union{UInt64, UInt128, Int64, Int128, BigInt}
7784

src/normed.jl

Lines changed: 93 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,34 @@
1-
# Normed{T,f} maps UInts from 0 to 2^f-1 to the range [0.0, 1.0]
1+
# Normed{T,f} maps Integers from 0 to 2^f-1 to the range [0.0, 1.0]
22
# For example, Normed{UInt8,8} == N0f8 maps 0x00 to 0.0 and 0xff to 1.0
33

4-
struct Normed{T<:Unsigned,f} <: FixedPoint{T,f}
4+
struct Normed{T<:Integer,f} <: FixedPoint{T,f}
55
i::T
66

7-
Normed{T, f}(i::Integer,_) where {T,f} = new{T, f}(i%T) # for setting by raw representation
7+
function Normed{T, f}(i::Integer,_) where {T,f} # 2-arg form for setting by raw representation
8+
isa(f, Int) || error("f must be an Int")
9+
0 < f <= sizeof(T)*8 - (T<:Signed) || error("f must be between 1 and the number of non-sign bits")
10+
new{T, f}(i%T)
11+
end
812
end
913

1014
Normed{T, f}(x::AbstractChar) where {T,f} = throw(ArgumentError("Normed cannot be constructed from a Char"))
1115
Normed{T, f}(x::Complex) where {T,f} = Normed{T, f}(convert(real(typeof(x)), x))
1216
Normed{T, f}(x::Base.TwicePrecision) where {T,f} = Normed{T, f}(convert(Float64, x))
13-
Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Unsigned,T2 <: Unsigned,f} = Normed{T1,f}(convert(T1, x.i), 0)
1417

15-
typechar(::Type{X}) where {X <: Normed} = 'N'
16-
signbits(::Type{X}) where {X <: Normed} = 0
18+
Normed{T1,f}(x::Normed{T2,f}) where {T1 <: Integer,T2 <: Integer,f} = Normed{T1,f}(convert(T1, x.i), 0)
19+
20+
typechar(::Type{X}) where X <: Normed{T} where T<:Unsigned = 'N'
21+
typechar(::Type{X}) where X <: Normed{T} where T<:Signed = 'S'
22+
signbits(::Type{X}) where X <: Normed{T} where T<:Unsigned = 0
23+
signbits(::Type{X}) where X <: Normed{T} where T<:Signed = 1
24+
25+
I64(::Type{<:Unsigned}) = UInt64
26+
I64(::Type{<:Signed}) = Int64
27+
I32(::Type{<:Unsigned}) = UInt32
28+
I32(::Type{<:Signed}) = Int32
1729

18-
for T in (UInt8, UInt16, UInt32, UInt64)
19-
for f in 1:sizeof(T)*8
30+
for T in (UInt8, UInt16, UInt32, UInt64, Int16, Int32, Int64)
31+
for f in 1:sizeof(T)*8-(T<:Signed)
2032
sym = Symbol(String(take!(showtype(_iotypealias, Normed{T,f}))))
2133
@eval begin
2234
const $sym = Normed{$T,$f}
@@ -25,77 +37,82 @@ for T in (UInt8, UInt16, UInt32, UInt64)
2537
end
2638
end
2739

28-
reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Unsigned,f} = Normed{T,f}(x, 0)
40+
reinterpret(::Type{Normed{T,f}}, x::T) where {T <: Integer,f} = Normed{T,f}(x, 0)
2941

30-
zero(::Type{Normed{T,f}}) where {T,f} = Normed{T,f}(zero(T),0)
31-
function oneunit(::Type{T}) where {T <: Normed}
32-
T(typemax(rawtype(T)) >> (8*sizeof(T)-nbitsfrac(T)), 0)
42+
zero(::Type{Normed{T,f}}) where {T <: Integer,f} = Normed{T,f}(zero(T),0)
43+
function oneunit(::Type{N}) where N <: Normed{T,f} where {T,f}
44+
N(typemax(rawtype(N)) >> (8*sizeof(N)-nbitsfrac(N)-(T<:Signed)), 0)
3345
end
3446
one(::Type{T}) where {T <: Normed} = oneunit(T)
3547
zero(x::Normed) = zero(typeof(x))
3648
oneunit(x::Normed) = one(typeof(x))
3749
one(x::Normed) = oneunit(x)
3850
rawone(v) = reinterpret(one(v))
3951

40-
# Conversions
41-
function Normed{T,f}(x::Normed{T2}) where {T <: Unsigned,T2 <: Unsigned,f}
52+
# More construction and conversion
53+
function Normed{T,f}(x::Normed{T2}) where {T <: Integer,T2 <: Integer,f}
4254
U = Normed{T,f}
4355
y = round((rawone(U)/rawone(x))*reinterpret(x))
44-
(0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
56+
(typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(U, x)
4557
reinterpret(U, _unsafe_trunc(T, y))
4658
end
4759
N0f16(x::N0f8) = reinterpret(N0f16, convert(UInt16, 0x0101*reinterpret(x)))
4860

49-
(::Type{U})(x::Real) where {U <: Normed} = _convert(U, x)
61+
(::Type{N})(x::Real) where {N <: Normed} = _convert(N, x)
5062

51-
function _convert(::Type{U}, x) where {T, f, U <: Normed{T,f}}
52-
if T == UInt128 # for UInt128, we can't widen
63+
function _convert(::Type{N}, x) where {T <: Integer, f, N <: Normed{T,f}}
64+
if T === UInt128 || T === Int128 # for [U]Int128, we can't widen
5365
# the upper limit is not exact
54-
(0 <= x) & (x <= (typemax(T)/rawone(U))) || throw_converterror(U, x)
55-
y = round(rawone(U)*x)
66+
((typemin(T)/rawone(N)) <= x) & (x <= (typemax(T)/rawone(N))) || throw_converterror(N, x)
67+
y = round(rawone(N)*x)
5668
else
57-
y = round(widen1(rawone(U))*x)
58-
(0 <= y) & (y <= typemax(T)) || throw_converterror(U, x)
69+
y = round(widen1(rawone(N))*x)
70+
(typemin(T) <= y) & (y <= typemax(T)) || throw_converterror(N, x)
5971
end
60-
reinterpret(U, _unsafe_trunc(T, y))
72+
reinterpret(N, _unsafe_trunc(T, y))
6173
end
6274
# Prevent overflow (https://discourse.julialang.org/t/saving-greater-than-8-bit-images/6057)
63-
function _convert(::Type{U}, x::Float16) where {T, f, U <: Normed{T,f}}
64-
if Float16(typemax(T)/rawone(U)) > Float32(typemax(T)/rawone(U))
65-
x == Float16(typemax(T)/rawone(U)) && return typemax(U)
75+
function _convert(::Type{N}, x::Float16) where {T <: Integer, f, N <: Normed{T,f}}
76+
if Float16(typemax(T)/rawone(N)) > Float32(typemax(T)/rawone(N))
77+
x == Float16(typemax(T)/rawone(N)) && return typemax(N)
78+
if T <: Signed
79+
x == Float16(typemin(T)/rawone(N)) && return typemin(N)
80+
end
6681
end
67-
return _convert(U, Float32(x))
82+
return _convert(N, Float32(x))
6883
end
69-
function _convert(::Type{U}, x::Tf) where {T, f, U <: Normed{T,f}, Tf <: Union{Float32, Float64}}
70-
if T == UInt128 && f == 53
71-
0 <= x <= Tf(3.777893186295717e22) || throw_converterror(U, x)
84+
function _convert(::Type{N}, x::Tf) where {T <: Integer, f, N <: Normed{T,f}, Tf <: Union{Float32, Float64}}
85+
if T === UInt128 && f == 53
86+
Tf(0) <= x <= Tf(3.777893186295717e22) || throw_converterror(N, x)
87+
elseif T === Int128 && f == 53
88+
Tf(-1.888946593147859e22) <= x <= Tf(1.888946593147859e22) || throw_converterror(N, x)
7289
else
73-
0 <= x <= Tf((typemax(T)-rawone(U))/rawone(U)+1) || throw_converterror(U, x)
90+
Tf((typemin(T)+rawone(N))/rawone(N)-1) <= x <= Tf((typemax(T)-rawone(N))/rawone(N)+1) || throw_converterror(N, x)
7491
end
7592

76-
significand_bits = Tf == Float64 ? 52 : 23
93+
significand_bits = Tf === Float64 ? 52 : 23
7794
if f <= (significand_bits + 1) && sizeof(T) * 8 < significand_bits
78-
return reinterpret(U, unsafe_trunc(T, round(rawone(U) * x)))
95+
return reinterpret(N, unsafe_trunc(T, round(rawone(N) * x)))
7996
end
8097
# cf. the implementation of `frexp`
8198
Tw = f < sizeof(T) * 8 ? T : widen1(T)
8299
bits = sizeof(Tw) * 8 - 1
83-
xu = reinterpret(Tf == Float64 ? UInt64 : UInt32, x)
100+
xu = reinterpret(Tf === Float64 ? I64(T) : I32(T), x)
84101
k = Int(xu >> significand_bits)
85-
k == 0 && return zero(U) # neglect subnormal numbers
102+
k == 0 && return zero(N) # neglect subnormal numbers
86103
significand = xu | (one(xu) << significand_bits)
87104
yh = unsafe_trunc(Tw, significand) << (bits - significand_bits)
88105
exponent_bias = Tf == Float64 ? 1023 : 127
89106
ex = exponent_bias - k + bits - f
90107
yi = bits >= f ? yh - (yh >> f) : yh
91108
if ex <= 0
92-
ex == 0 && return reinterpret(U, unsafe_trunc(T, yi))
93-
ex != -1 || signbit(signed(yi)) && return typemax(U)
94-
return reinterpret(U, unsafe_trunc(T, yi + yi))
109+
ex == 0 && return reinterpret(N, unsafe_trunc(T, yi))
110+
ex != -1 || signbit(signed(yi)) && return typemax(N)
111+
return reinterpret(N, unsafe_trunc(T, yi + yi))
95112
end
96-
ex > bits && return reinterpret(U, ex == bits + 1 ? one(T) : zero(T))
113+
ex > bits && return reinterpret(N, ex == bits + 1 ? one(T) : zero(T))
97114
yi += one(Tw)<<((ex - 1) & bits) # RoundNearestTiesUp
98-
return reinterpret(U, unsafe_trunc(T, yi >> (ex & bits)))
115+
return reinterpret(N, unsafe_trunc(T, yi >> (ex & bits)))
99116
end
100117

101118
rem(x::T, ::Type{T}) where {T <: Normed} = x
@@ -125,14 +142,18 @@ function (::Type{T})(x::Normed) where {T <: AbstractFloat}
125142
convert(T, y) # needed for types like Float16 which promote arithmetic to Float32
126143
end
127144

128-
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32}, f}
145+
# A slightly faster copysign (one that avoids type-piracy)
146+
setsign(x::Float32, i::UInt32) = x
147+
setsign(x::Float32, i::Int32) = reinterpret(Float32, reinterpret(UInt32, x) | (reinterpret(UInt32, i) & reinterpret(UInt32, typemin(Int32))))
148+
149+
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16, UInt32, Int8, Int16, Int32}, f}
129150
f == 1 ? Float16(x.i) : Float16(Float32(x))
130151
end
131-
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
152+
function Base.Float16(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128, Int64, Int128}, f}
132153
f == 1 ? Float16(x.i) : Float16(Float64(x))
133154
end
134155

135-
function Base.Float32(x::Normed{UInt8,f}) where f
156+
function Base.Float32(x::Normed{<:Union{UInt8,Int8},f}) where f
136157
f == 1 && return Float32(x.i)
137158
f == 2 && return Float32(Int32(x.i) * 0x101) * @f32(0x550055p-32)
138159
f == 3 && return Float32(Int32(x.i) * 0x00b) * @f32(0xd4c77bp-30)
@@ -143,7 +164,7 @@ function Base.Float32(x::Normed{UInt8,f}) where f
143164
f == 8 && return Float32(Int32(x.i) * 0x155) * @f32(0xc0f0fdp-40)
144165
0.0f0
145166
end
146-
function Base.Float32(x::Normed{UInt16,f}) where f
167+
function Base.Float32(x::Normed{<:Union{UInt16,Int16},f}) where f
147168
f32 = Float32(x.i)
148169
f == 1 && return f32
149170
f == 2 && return f32 * @f32(0x55p-8) + f32 * @f32(0x555555p-32)
@@ -155,19 +176,19 @@ function Base.Float32(x::Normed{UInt16,f}) where f
155176
f == 16 && return f32 * @f32(0x01p-16) + f32 * @f32(0x010001p-48)
156177
Float32(x.i / rawone(x))
157178
end
158-
function Base.Float32(x::Normed{UInt32,f}) where f
179+
function Base.Float32(x::Normed{T,f}) where {T <: Union{UInt32,Int32}, f}
159180
f == 1 && return Float32(x.i)
160181
i32 = unsafe_trunc(Int32, x.i)
161182
if f == 32
162183
rh, rl = Float32(i32>>>16), Float32((i32&0xFFFF)<<8 | (i32>>>24))
163-
return muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40))
184+
return setsign(muladd(rh, @f32(0x1p-16), rl * @f32(0x1p-40)), x.i)
164185
elseif f >= 25
165186
rh, rl = Float32(i32>>>16),Float32(((i32&0xFFFF)<<14) + (i32>>>(f-14)))
166-
return muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f)))
187+
return setsign(muladd(rh, Float32(@exp2(16-f)), rl * Float32(@exp2(-14-f))), x.i)
167188
end
168189
# FIXME: avoid the branch in native x86_64 (non-SIMD) codes
169190
m = ifelse(i32 < 0, 0x1p32 * inv_rawone(x), 0.0)
170-
Float32(muladd(Float64(i32), inv_rawone(x), m))
191+
return setsign(Float32(muladd(Float64(i32), inv_rawone(x), m)), x.i)
171192
end
172193
function Base.Float32(x::Normed{Ti,f}) where {Ti <: Union{UInt64, UInt128}, f}
173194
f == 1 ? Float32(x.i) : Float32(Float64(x))
@@ -176,7 +197,11 @@ end
176197
function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{UInt8, UInt16}, f}
177198
Float64(Normed{UInt32,f}(x))
178199
end
179-
function Base.Float64(x::Normed{UInt32,f}) where f
200+
function Base.Float64(x::Normed{Ti,f}) where {Ti <: Union{Int8, Int16}, f}
201+
Float64(Normed{Int32,f}(x))
202+
end
203+
204+
function Base.Float64(x::Normed{<:Union{UInt32,Int32},f}) where f
180205
f64 = Float64(x.i)
181206
f == 1 && return f64
182207
f == 2 && return (f64 * 0x040001) * 0x15555000015555p-72
@@ -206,7 +231,7 @@ function Base.Float64(x::Normed{UInt32,f}) where f
206231
f == 32 && return (f64 * 0x010101) * 0x00ff0000ffff01p-96
207232
f64 / rawone(x)
208233
end
209-
function Base.Float64(x::Normed{UInt64,f}) where f
234+
function Base.Float64(x::Normed{UInt64,f}) where f # TODO: optimized version for Int64
210235
f == 1 && return Float64(x.i)
211236
if f >= 53
212237
rh = Float64(unsafe_trunc(Int64, x.i >> 16)) * @exp2(16-f) # upper 48 bits
@@ -215,7 +240,7 @@ function Base.Float64(x::Normed{UInt64,f}) where f
215240
end
216241
x.i / rawone(x)
217242
end
218-
function Base.Float64(x::Normed{UInt128,f}) where f
243+
function Base.Float64(x::Normed{UInt128,f}) where f # TODO: optimized version for Int128
219244
f == 1 && return Float64(x.i)
220245
ih, il = unsafe_trunc(Int64, x.i>>64), unsafe_trunc(Int64, x.i)
221246
rh = Float64(ih>>>16) * @exp2(f <= 53 ? 80 : 80 - f) # upper 48 bits
@@ -241,7 +266,8 @@ Base.Rational{Ti}(x::Normed) where {Ti <: Integer} = convert(Ti, reinterpret(x))
241266
Base.Rational(x::Normed) = reinterpret(x)//rawone(x)
242267

243268
# Traits
244-
abs(x::Normed) = x
269+
abs(x::Normed{<:Unsigned}) = x
270+
abs(x::T) where T<:Normed = T(abs(x.i), 0)
245271

246272
(-)(x::T) where {T <: Normed} = T(-reinterpret(x), 0)
247273
(~)(x::T) where {T <: Normed} = T(~reinterpret(x), 0)
@@ -257,48 +283,38 @@ abs(x::Normed) = x
257283

258284
# Functions
259285
trunc(x::T) where {T <: Normed} = T(div(reinterpret(x), rawone(T))*rawone(T),0)
260-
floor(x::T) where {T <: Normed} = trunc(x)
286+
floor(x::Normed{T}) where {T <: Unsigned} = trunc(x)
287+
function floor(x::Normed{T}) where {T <: Signed}
288+
d, r = divrem(reinterpret(x), rawone(x))
289+
return typeof(x)((d - signbit(r))*rawone(x), 0)
290+
end
261291
function round(x::Normed{T,f}) where {T,f}
262-
mask = convert(T, 1<<(f-1))
263-
y = trunc(x)
264-
return convert(T, reinterpret(x)-reinterpret(y)) & mask>0 ?
265-
Normed{T,f}(y+oneunit(Normed{T,f})) : y
292+
d, r = divrem(reinterpret(x), rawone(x))
293+
return Normed{T,f}((d + r>>(f-1))*rawone(x), 0)
266294
end
267295
function ceil(x::Normed{T,f}) where {T,f}
268-
k = 8*sizeof(T)-f
269-
mask = (typemax(T)<<k)>>k
270-
y = trunc(x)
271-
return convert(T, reinterpret(x)-reinterpret(y)) & (mask)>0 ?
272-
Normed{T,f}(y+oneunit(Normed{T,f})) : y
296+
d, r = divrem(reinterpret(x), rawone(x))
297+
return Normed{T,f}((d + (r>zero(r)))*rawone(x), 0)
273298
end
274299

275300
trunc(::Type{T}, x::Normed) where {T <: Integer} = convert(T, div(reinterpret(x), rawone(x)))
276-
round(::Type{T}, x::Normed) where {T <: Integer} = round(T, reinterpret(x)/rawone(x))
277-
floor(::Type{T}, x::Normed) where {T <: Integer} = trunc(T, x)
278-
ceil(::Type{T}, x::Normed) where {T <: Integer} = ceil(T, reinterpret(x)/rawone(x))
301+
round(::Type{T}, x::Normed) where {T <: Integer} = round(T, float(x))
302+
floor(::Type{T}, x::Normed{<:Unsigned}) where {T <: Integer} = trunc(T, x)
303+
floor(::Type{T}, x::Normed{<:Signed}) where {T <: Integer} = floor(T, float(x))
304+
ceil(::Type{T}, x::Normed) where {T <: Integer} = ceil(T, float(x))
279305

280306
isfinite(x::Normed) = true
281307
isnan(x::Normed) = false
282308
isinf(x::Normed) = false
283309

284-
bswap(x::Normed{UInt8,f}) where {f} = x
285-
bswap(x::Normed) = typeof(x)(bswap(reinterpret(x)),0)
310+
bswap(x::Normed{<:Union{UInt8,Int8},f}) where {f} = x
311+
bswap(x::Normed) = typeof(x)(bswap(reinterpret(x)), 0)
286312

287313
function minmax(x::T, y::T) where {T <: Normed}
288314
a, b = minmax(reinterpret(x), reinterpret(y))
289315
T(a,0), T(b,0)
290316
end
291317

292-
# Iteration
293-
# The main subtlety here is that iterating over N0f8(0):N0f8(1) will wrap around
294-
# unless we iterate using a wider type
295-
@inline start(r::StepRange{T}) where {T <: Normed} = widen1(reinterpret(r.start))
296-
@inline next(r::StepRange{T}, i::Integer) where {T <: Normed} = (T(i,0), i+reinterpret(r.step))
297-
@inline function done(r::StepRange{T}, i::Integer) where {T <: Normed}
298-
i1, i2 = reinterpret(r.start), reinterpret(r.stop)
299-
isempty(r) | (i < min(i1, i2)) | (i > max(i1, i2))
300-
end
301-
302318
function decompose(x::Normed)
303319
g = gcd(reinterpret(x), rawone(x))
304320
div(reinterpret(x),g), 0, div(rawone(x),g)

0 commit comments

Comments
 (0)