@@ -10,8 +10,14 @@ import Base: ==, <, <=, -, +, *, /, ~, isapprox,
10
10
11
11
using Base: @pure
12
12
13
- # T => BaseType
14
- # f => Number of bits reserved for fractional part
13
+ """
14
+ FixedPoint{T <: Integer, f} <: Real
15
+
16
+ Supertype of the two fixed-point number types: `Fixed{T, f}` and `Normed{T, f}`.
17
+
18
+ The parameter `T` is the underlying machine representation and `f` is the number
19
+ of fraction bits.
20
+ """
15
21
abstract type FixedPoint{T <: Integer , f} <: Real end
16
22
17
23
@@ -25,16 +31,20 @@ export
25
31
# Functions
26
32
scaledual
27
33
34
+ include (" utilities.jl" )
35
+
36
+ # reinterpretation
28
37
reinterpret (x:: FixedPoint ) = x. i
29
38
reinterpret (:: Type{T} , x:: FixedPoint{T,f} ) where {T,f} = x. i
39
+ reinterpret (:: Type{X} , x:: T ) where {T <: Integer , X <: FixedPoint{T} } = X (x, 0 )
40
+
41
+ # static parameters
42
+ nbitsfrac (:: Type{X} ) where {T, f, X <: FixedPoint{T,f} } = f
43
+ rawtype (:: Type{X} ) where {T, X <: FixedPoint{T} } = T
30
44
31
45
# construction using the (approximate) intended value, i.e., N0f8
32
46
* (x:: Real , :: Type{X} ) where {X<: FixedPoint } = X (x)
33
47
34
- # comparison
35
- == (x:: T , y:: T ) where {T <: FixedPoint } = x. i == y. i
36
- < (x:: T , y:: T ) where {T <: FixedPoint } = x. i < y. i
37
- <= (x:: T , y:: T ) where {T <: FixedPoint } = x. i <= y. i
38
48
"""
39
49
isapprox(x::FixedPoint, y::FixedPoint; rtol=0, atol=max(eps(x), eps(y)))
40
50
52
62
# predicates
53
63
isinteger (x:: FixedPoint{T,f} ) where {T,f} = (x. i& (1 << f- 1 )) == 0
54
64
65
+ # identities
66
+ zero (:: Type{X} ) where {X <: FixedPoint } = X (zero (rawtype (X)), 0 )
67
+ oneunit (:: Type{X} ) where {X <: FixedPoint } = X (rawone (X), 0 )
68
+ one (:: Type{X} ) where {X <: FixedPoint } = oneunit (X)
69
+
70
+ # for Julia v1.0, which does not fold `div_float` before inlining
71
+ inv_rawone (x) = (@generated ) ? (y = 1.0 / rawone (x); :($ y)) : 1.0 / rawone (x)
72
+
55
73
# traits
74
+ sizeof (:: Type{X} ) where {X <: FixedPoint } = sizeof (rawtype (X))
75
+ eps (:: Type{X} ) where {X <: FixedPoint } = X (oneunit (rawtype (X)), 0 )
56
76
typemax (:: Type{T} ) where {T <: FixedPoint } = T (typemax (rawtype (T)), 0 )
57
77
typemin (:: Type{T} ) where {T <: FixedPoint } = T (typemin (rawtype (T)), 0 )
58
78
floatmin (:: Type{T} ) where {T <: FixedPoint } = eps (T)
59
79
floatmax (:: Type{T} ) where {T <: FixedPoint } = typemax (T)
60
80
61
- widen1 (:: Type{Int8} ) = Int16
62
- widen1 (:: Type{UInt8} ) = UInt16
63
- widen1 (:: Type{Int16} ) = Int32
64
- widen1 (:: Type{UInt16} ) = UInt32
65
- widen1 (:: Type{Int32} ) = Int64
66
- widen1 (:: Type{UInt32} ) = UInt64
67
- widen1 (:: Type{Int64} ) = Int128
68
- widen1 (:: Type{UInt64} ) = UInt128
69
- widen1 (:: Type{Int128} ) = Int128
70
- widen1 (:: Type{UInt128} ) = UInt128
71
- widen1 (x:: Integer ) = x % widen1 (typeof (x))
72
-
73
- const ShortInts = Union{Int8,UInt8,Int16,UInt16}
74
- const LongInts = Union{UInt64, UInt128, Int64, Int128, BigInt}
75
81
76
82
"""
77
83
floattype(::Type{T})
@@ -100,36 +106,54 @@ floattype(::Type{T}) where {T <: Real} = T # fallback
100
106
floattype (:: Type{T} ) where {T <: Union{ShortInts, Bool} } = Float32
101
107
floattype (:: Type{T} ) where {T <: Integer } = Float64
102
108
floattype (:: Type{T} ) where {T <: LongInts } = BigFloat
103
- floattype (:: Type{FixedPoint{T,f}} ) where {T <: ShortInts ,f} = Float32
104
- floattype (:: Type{FixedPoint{T,f}} ) where {T <: Integer ,f} = Float64
105
- floattype (:: Type{FixedPoint{T,f}} ) where {T <: LongInts ,f} = BigFloat
106
- floattype (:: Type{F} ) where {F <: FixedPoint } = floattype (supertype (F))
107
- floattype (x:: FixedPoint ) = floattype (typeof (x))
109
+ floattype (:: Type{X} ) where {T <: ShortInts , X <: FixedPoint{T} } = Float32
110
+ floattype (:: Type{X} ) where {T <: Integer , X <: FixedPoint{T} } = Float64
111
+ floattype (:: Type{X} ) where {T <: LongInts , X <: FixedPoint{T} } = BigFloat
108
112
109
- nbitsfrac (:: Type{FixedPoint{T,f}} ) where {T <: Integer ,f} = f
110
- nbitsfrac (:: Type{F} ) where {F <: FixedPoint } = nbitsfrac (supertype (F))
113
+ float (x:: FixedPoint ) = convert (floattype (x), x)
111
114
112
- rawtype (:: Type{FixedPoint{T,f}} ) where {T <: Integer ,f} = T
113
- rawtype (:: Type{F} ) where {F <: FixedPoint } = rawtype (supertype (F))
114
- rawtype (x:: FixedPoint ) = rawtype (typeof (x))
115
+ function minmax (x:: X , y:: X ) where {X <: FixedPoint }
116
+ a, b = minmax (reinterpret (x), reinterpret (y))
117
+ X (a,0 ), X (b,0 )
118
+ end
119
+
120
+ bswap (x:: X ) where {X <: FixedPoint } = sizeof (X) == 1 ? x : X (bswap (x. i), 0 )
115
121
116
- # This IOBuffer is used during module definition to generate typealias names
117
- _iotypealias = IOBuffer ()
122
+ for f in (:zero , :oneunit , :one , :eps , :rawone , :rawtype , :floattype )
123
+ @eval begin
124
+ $ f (x:: FixedPoint ) = $ f (typeof (x))
125
+ end
126
+ end
127
+ for f in (:(== ), :< , :<= , :div , :fld , :fld1 )
128
+ @eval begin
129
+ $ f (x:: X , y:: X ) where {X <: FixedPoint } = $ f (x. i, y. i)
130
+ end
131
+ end
132
+ for f in (:- , :~ , :abs )
133
+ @eval begin
134
+ $ f (x:: X ) where {X <: FixedPoint } = X ($ f (x. i), 0 )
135
+ end
136
+ end
137
+ for f in (:+ , :- , :rem , :mod , :mod1 , :min , :max )
138
+ @eval begin
139
+ $ f (x:: X , y:: X ) where {X <: FixedPoint } = X ($ f (x. i, y. i), 0 )
140
+ end
141
+ end
118
142
119
143
# Printing. These are used to generate type-symbols, so we need them
120
144
# before we include any files.
121
145
function showtype (io:: IO , :: Type{X} ) where {X <: FixedPoint }
122
146
print (io, typechar (X))
123
147
f = nbitsfrac (X)
124
- m = sizeof (X)* 8 - f- signbits (X)
148
+ m = bitwidth (X)- f- signbits (X)
125
149
print (io, m, ' f' , f)
126
150
io
127
151
end
128
152
function show (io:: IO , x:: FixedPoint{T,f} ) where {T,f}
129
- show (io, round (convert (Float64,x), digits= ceil (Int,f/ _log2_10)))
153
+ log10_2 = 0.3010299956639812
154
+ show (io, round (convert (Float64,x), digits= ceil (Int, f * log10_2)))
130
155
get (io, :compact , false ) || showtype (io, typeof (x))
131
156
end
132
- const _log2_10 = 3.321928094887362
133
157
134
158
function Base. showarg (io:: IO , a:: Array{T} , toplevel) where {T<: FixedPoint }
135
159
toplevel || print (io, " ::" )
@@ -144,10 +168,6 @@ include("normed.jl")
144
168
include (" deprecations.jl" )
145
169
const UF = (N0f8, N6f10, N4f12, N2f14, N0f16)
146
170
147
- eps (:: Type{T} ) where {T <: FixedPoint } = T (oneunit (rawtype (T)),0 )
148
- eps (:: T ) where {T <: FixedPoint } = eps (T)
149
- sizeof (:: Type{T} ) where {T <: FixedPoint } = sizeof (rawtype (T))
150
-
151
171
# Promotions for reductions
152
172
const Treduce = Float64
153
173
Base. add_sum (x:: FixedPoint , y:: FixedPoint ) = Treduce (x) + Treduce (y)
@@ -158,17 +178,6 @@ Base.reduce_empty(::typeof(Base.mul_prod), ::Type{F}) where {F<:FixedPoint} = on
158
178
Base. reduce_first (:: typeof (Base. mul_prod), x:: FixedPoint ) = Treduce (x)
159
179
160
180
161
- for f in (:div , :fld , :fld1 )
162
- @eval begin
163
- $ f (x:: T , y:: T ) where {T <: FixedPoint } = $ f (reinterpret (x),reinterpret (y))
164
- end
165
- end
166
- for f in (:rem , :mod , :mod1 , :min , :max )
167
- @eval begin
168
- $ f (x:: T , y:: T ) where {T <: FixedPoint } = T ($ f (reinterpret (x),reinterpret (y)),0 )
169
- end
170
- end
171
-
172
181
"""
173
182
sd, ad = scaledual(s::Number, a)
174
183
@@ -185,13 +194,13 @@ scaledual(::Type{Tdual}, x::FixedPoint) where Tdual = convert(Tdual, 1/rawone(x)
185
194
scaledual (:: Type{Tdual} , x:: AbstractArray{T} ) where {Tdual, T <: FixedPoint } =
186
195
convert (Tdual, 1 / rawone (T)), reinterpret (rawtype (T), x)
187
196
188
- @noinline function throw_converterror (:: Type{T } , x) where {T <: FixedPoint }
189
- n = 2 ^ ( 8 * sizeof (T) )
190
- bitstring = sizeof (T ) == 1 ? " an 8-bit" : " a $(8 * sizeof (T )) -bit"
197
+ @noinline function throw_converterror (:: Type{X } , x) where {X <: FixedPoint }
198
+ n = 2 ^ bitwidth (X )
199
+ bitstring = bitwidth (X ) == 8 ? " an 8-bit" : " a $(bitwidth (X )) -bit"
191
200
io = IOBuffer ()
192
- show (IOContext (io, :compact => true ), typemin (T )); Tmin = String (take! (io))
193
- show (IOContext (io, :compact => true ), typemax (T )); Tmax = String (take! (io))
194
- throw (ArgumentError (" $T is $bitstring type representing $n values from $Tmin to $Tmax ; cannot represent $x " ))
201
+ show (IOContext (io, :compact => true ), typemin (X )); Xmin = String (take! (io))
202
+ show (IOContext (io, :compact => true ), typemax (X )); Xmax = String (take! (io))
203
+ throw (ArgumentError (" $X is $bitstring type representing $n values from $Xmin to $Xmax ; cannot represent $x " ))
195
204
end
196
205
197
206
rand (:: Type{T} ) where {T <: FixedPoint } = reinterpret (T, rand (rawtype (T)))
0 commit comments