diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index bcfc56e7..f956e058 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -11,9 +11,6 @@ import Base: ==, <, <=, -, +, *, /, ~, isapprox, import Random: Random, AbstractRNG, SamplerType, rand! -import Base.Checked: checked_neg, checked_abs, checked_add, checked_sub, checked_mul, - checked_div, checked_fld, checked_cld, checked_rem, checked_mod - using Base: @pure """ @@ -35,14 +32,11 @@ export # "special" typealiases # Q and N typealiases are exported in separate source files # Functions - scaledual, - wrapping_neg, wrapping_abs, wrapping_add, wrapping_sub, wrapping_mul, - wrapping_div, wrapping_fld, wrapping_cld, wrapping_rem, wrapping_mod, - saturating_neg, saturating_abs, saturating_add, saturating_sub, saturating_mul, - saturating_div, saturating_fld, saturating_cld, saturating_rem, saturating_mod, - wrapping_fdiv, saturating_fdiv, checked_fdiv + scaledual include("utilities.jl") +using .Utilities +import .Utilities: floattype, rawone, nbitsfrac, rawtype, signbits, nbitsint, scaledual # reinterpretation reinterpret(x::FixedPoint) = x.i @@ -57,18 +51,6 @@ rawtype(::Type{X}) where {T, X <: FixedPoint{T}} = T signbits(::Type{X}) where {T, X <: FixedPoint{T}} = T <: Unsigned ? 0 : 1 nbitsint(::Type{X}) where {X <: FixedPoint} = bitwidth(X) - nbitsfrac(X) - signbits(X) -# construction using the (approximate) intended value, i.e., N0f8 -*(x::Real, ::Type{X}) where {X <: FixedPoint} = _convert(X, x) -wrapping_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = x % X -saturating_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = clamp(x, X) -checked_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = _convert(X, x) - -# type modulus -rem(x::Real, ::Type{X}) where {X <: FixedPoint} = _rem(x, X) -wrapping_rem(x::Real, ::Type{X}) where {X <: FixedPoint} = _rem(x, X) -saturating_rem(x::Real, ::Type{X}) where {X <: FixedPoint} = _rem(x, X) -checked_rem(x::Real, ::Type{X}) where {X <: FixedPoint} = _rem(x, X) - # constructor-style conversions (::Type{X})(x::X) where {X <: FixedPoint} = x (::Type{X})(x::Number) where {X <: FixedPoint} = _convert(X, x) @@ -139,9 +121,6 @@ zero(::Type{X}) where {X <: FixedPoint} = X(zero(rawtype(X)), 0) oneunit(::Type{X}) where {X <: FixedPoint} = X(rawone(X), 0) one(::Type{X}) where {X <: FixedPoint} = oneunit(X) -# for Julia v1.0, which does not fold `div_float` before inlining -inv_rawone(x) = (@generated) ? (y = 1.0 / rawone(x); :($y)) : 1.0 / rawone(x) - # traits eps(::Type{X}) where {X <: FixedPoint} = X(oneunit(rawtype(X)), 0) typemax(::Type{T}) where {T <: FixedPoint} = T(typemax(rawtype(T)), 0) @@ -192,164 +171,12 @@ RGB{Float32} `RGB` itself is not a subtype of `AbstractFloat`, but unlike `RGB{N0f8}` operations with `RGB{Float32}` are not subject to integer overflow. """ -floattype(::Type{T}) where {T <: AbstractFloat} = T # fallback (we want a MethodError if no method producing AbstractFloat is defined) -floattype(::Type{T}) where {T <: Union{ShortInts, Bool}} = Float32 -floattype(::Type{T}) where {T <: Integer} = Float64 -floattype(::Type{T}) where {T <: LongInts} = BigFloat -floattype(::Type{T}) where {I <: Integer, T <: Rational{I}} = typeof(zero(I)/oneunit(I)) -floattype(::Type{<:AbstractIrrational}) = Float64 floattype(::Type{X}) where {T <: ShortInts, X <: FixedPoint{T}} = Float32 floattype(::Type{X}) where {T <: Integer, X <: FixedPoint{T}} = Float64 floattype(::Type{X}) where {T <: LongInts, X <: FixedPoint{T}} = BigFloat -# Non-Real types -floattype(::Type{Complex{T}}) where T = Complex{floattype(T)} -floattype(::Type{Base.TwicePrecision{Float64}}) = Float64 # wider would be nice, but hardware support is paramount -floattype(::Type{Base.TwicePrecision{T}}) where T<:Union{Float16,Float32} = widen(T) - float(x::FixedPoint) = convert(floattype(x), x) -# wrapping arithmetic -wrapping_neg(x::X) where {X <: FixedPoint} = X(-x.i, 0) -wrapping_abs(x::X) where {X <: FixedPoint} = X(abs(x.i), 0) -wrapping_add(x::X, y::X) where {X <: FixedPoint} = X(x.i + y.i, 0) -wrapping_sub(x::X, y::X) where {X <: FixedPoint} = X(x.i - y.i, 0) -wrapping_mul(x::X, y::X) where {X <: FixedPoint} = (float(x) * float(y)) % X -function wrapping_fdiv(x::X, y::X) where {X <: FixedPoint} - z = floattype(X)(x.i) / floattype(X)(y.i) - isfinite(z) ? z % X : zero(X) -end -function wrapping_div(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} - z = round(floattype(X)(x.i) / floattype(X)(y.i), r) - isfinite(z) || return zero(T) - if T <: Unsigned - _unsafe_trunc(T, z) - else - z > typemax(T) ? typemin(T) : _unsafe_trunc(T, z) - end -end -wrapping_fld(x::X, y::X) where {X <: FixedPoint} = wrapping_div(x, y, RoundDown) -wrapping_cld(x::X, y::X) where {X <: FixedPoint} = wrapping_div(x, y, RoundUp) -wrapping_rem(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} = - X(x.i - wrapping_div(x, y, r) * y.i, 0) -wrapping_mod(x::X, y::X) where {X <: FixedPoint} = wrapping_rem(x, y, RoundDown) - -# saturating arithmetic -saturating_neg(x::X) where {X <: FixedPoint} = X(~min(x.i - true, x.i), 0) -saturating_neg(x::X) where {X <: FixedPoint{<:Unsigned}} = zero(X) - -saturating_abs(x::X) where {X <: FixedPoint} = - X(ifelse(signbit(abs(x.i)), typemax(x.i), abs(x.i)), 0) - -saturating_add(x::X, y::X) where {X <: FixedPoint} = - X(x.i + ifelse(x.i < 0, max(y.i, typemin(x.i) - x.i), min(y.i, typemax(x.i) - x.i)), 0) -saturating_add(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i + min(~x.i, y.i), 0) - -saturating_sub(x::X, y::X) where {X <: FixedPoint} = - X(x.i - ifelse(x.i < 0, min(y.i, x.i - typemin(x.i)), max(y.i, x.i - typemax(x.i))), 0) -saturating_sub(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i - min(x.i, y.i), 0) - -saturating_mul(x::X, y::X) where {X <: FixedPoint} = clamp(float(x) * float(y), X) - -saturating_fdiv(x::X, y::X) where {X <: FixedPoint} = - clamp(floattype(X)(x.i) / floattype(X)(y.i), X) - -function saturating_div(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} - z = round(floattype(X)(x.i) / floattype(X)(y.i), r) - isnan(z) && return zero(T) - if T <: Unsigned - isfinite(z) ? _unsafe_trunc(T, z) : typemax(T) - else - _unsafe_trunc(T, clamp(z, typemin(T), typemax(T))) - end -end -saturating_fld(x::X, y::X) where {X <: FixedPoint} = saturating_div(x, y, RoundDown) -saturating_cld(x::X, y::X) where {X <: FixedPoint} = saturating_div(x, y, RoundUp) -function saturating_rem(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} - T <: Unsigned && r isa RoundingMode{:Up} && return zero(X) - X(x.i - saturating_div(x, y, r) * y.i, 0) -end -saturating_mod(x::X, y::X) where {X <: FixedPoint} = saturating_rem(x, y, RoundDown) - -# checked arithmetic -checked_neg(x::X) where {X <: FixedPoint} = checked_sub(zero(X), x) -function checked_abs(x::X) where {X <: FixedPoint} - abs(x.i) >= 0 || throw_overflowerror_abs(x) - X(abs(x.i), 0) -end -function checked_add(x::X, y::X) where {X <: FixedPoint} - r, f = Base.Checked.add_with_overflow(x.i, y.i) - z = X(r, 0) # store first - f && throw_overflowerror(:+, x, y) - z -end -function checked_sub(x::X, y::X) where {X <: FixedPoint} - r, f = Base.Checked.sub_with_overflow(x.i, y.i) - z = X(r, 0) # store first - f && throw_overflowerror(:-, x, y) - z -end -function checked_mul(x::X, y::X) where {X <: FixedPoint} - z = float(x) * float(y) - typemin(X) - eps(X)/2 <= z < typemax(X) + eps(X)/2 || throw_overflowerror(:*, x, y) - z % X -end -function checked_fdiv(x::X, y::X) where {T, X <: FixedPoint{T}} - y === zero(X) && throw(DivideError()) - z = floattype(X)(x.i) / floattype(X)(y.i) - if T <: Unsigned - z < typemax(X) + eps(X)/2 || throw_overflowerror(:/, x, y) - else - typemin(X) - eps(X)/2 <= z < typemax(X) + eps(X)/2 || throw_overflowerror(:/, x, y) - end - z % X -end -function checked_div(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} - y === zero(X) && throw(DivideError()) - z = round(floattype(X)(x.i) / floattype(X)(y.i), r) - if T <: Signed - z <= typemax(T) || throw_overflowerror_div(r, x, y) - end - _unsafe_trunc(T, z) -end -checked_fld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundDown) -checked_cld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundUp) -function checked_rem(x::X, y::X, r::RoundingMode = RoundToZero) where {T, X <: FixedPoint{T}} - y === zero(X) && throw(DivideError()) - fx, fy = floattype(X)(x.i), floattype(X)(y.i) - z = fx - round(fx / fy, r) * fy - if T <: Unsigned && r isa RoundingMode{:Up} - z >= zero(z) || throw_overflowerror_rem(r, x, y) - end - X(_unsafe_trunc(T, z), 0) -end -checked_mod(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) - -# default arithmetic -const DEFAULT_ARITHMETIC = :wrapping - -for (op, name) in ((:-, :neg), (:abs, :abs)) - f = Symbol(DEFAULT_ARITHMETIC, :_, name) - @eval begin - $op(x::X) where {X <: FixedPoint} = $f(x) - end -end -for (op, name) in ((:+, :add), (:-, :sub), (:*, :mul)) - f = Symbol(DEFAULT_ARITHMETIC, :_, name) - @eval begin - $op(x::X, y::X) where {X <: FixedPoint} = $f(x, y) - end -end -# force checked arithmetic -/(x::X, y::X) where {X <: FixedPoint} = checked_fdiv(x, y) -div(x::X, y::X, r::RoundingMode = RoundToZero) where {X <: FixedPoint} = checked_div(x, y, r) -fld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundDown) -cld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundUp) -rem(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundToZero) -rem(x::X, y::X, ::RoundingMode{:Down}) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) -rem(x::X, y::X, ::RoundingMode{:Up}) where {X <: FixedPoint} = checked_rem(x, y, RoundUp) -mod(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) - function minmax(x::X, y::X) where {X <: FixedPoint} a, b = minmax(reinterpret(x), reinterpret(y)) X(a,0), X(b,0) @@ -518,6 +345,34 @@ include("normed.jl") include("deprecations.jl") const UF = (N0f8, N6f10, N4f12, N2f14, N0f16) +include("arithmetic/arithmetic.jl") +using .FixedPointArithmetic +# re-export +for name in names(FixedPointArithmetic.Wrapping) + startswith(string(name), "wrapping") || continue + @eval export $name +end +for name in names(FixedPointArithmetic.Saturating) + startswith(string(name), "saturating") || continue + @eval export $name +end +for name in names(FixedPointArithmetic.Checked) + startswith(string(name), "checked") || continue + @eval export $name +end + +# construction using the (approximate) intended value, i.e., N0f8 +*(x::Real, ::Type{X}) where {X <: FixedPoint} = _convert(X, x) +Wrapping.wrapping_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = x % X +Saturating.saturating_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = clamp(x, X) +Checked.checked_mul(x::Real, ::Type{X}) where {X <: FixedPoint} = _convert(X, x) + +# type modulus +rem(x::Real, ::Type{X}) where {X <: FixedPoint} = _rem(x, X) +Wrapping.wrapping_rem(x::Real, ::Type{X}) where {X<:FixedPoint} = _rem(x, X) +Saturating.saturating_rem(x::Real, ::Type{X}) where {X<:FixedPoint} = _rem(x, X) +Checked.checked_rem(x::Real, ::Type{X}) where {X<:FixedPoint} = _rem(x, X) + # Promotions promote_rule(::Type{X}, ::Type{Tf}) where {X <: FixedPoint, Tf <: AbstractFloat} = promote_type(floattype(X), Tf) @@ -585,29 +440,6 @@ scaledual(::Type{Tdual}, x::AbstractArray{T}) where {Tdual, T <: FixedPoint} = throw(ArgumentError(String(take!(io)))) end -@noinline function throw_overflowerror(op::Symbol, @nospecialize(x), @nospecialize(y)) - io = IOBuffer() - print(io, x, ' ', op, ' ', y, " overflowed for type ") - showtype(io, typeof(x)) - throw(OverflowError(String(take!(io)))) -end -@noinline function throw_overflowerror_abs(@nospecialize(x)) - io = IOBuffer() - print(io, "abs(", x, ") overflowed for type ") - showtype(io, typeof(x)) - throw(OverflowError(String(take!(io)))) -end -@noinline function throw_overflowerror_div(r::RoundingMode, @nospecialize(x), @nospecialize(y)) - io = IOBuffer() - op = r === RoundUp ? "cld(" : r === RoundDown ? "fld(" : "div(" - print(io, op, x, ", ", y, ") overflowed for type ", rawtype(x)) - throw(OverflowError(String(take!(io)))) -end -@noinline function throw_overflowerror_rem(r::RoundingMode, @nospecialize(x), @nospecialize(y)) - io = IOBuffer() - print(io, "rem(", x, ", ", y, ", ", r, ") overflowed for type ", typeof(x)) - throw(OverflowError(String(take!(io)))) -end function Random.rand(r::AbstractRNG, ::SamplerType{X}) where X <: FixedPoint X(rand(r, rawtype(X)), 0) diff --git a/src/arithmetic/arithmetic.jl b/src/arithmetic/arithmetic.jl new file mode 100644 index 00000000..6eb7c7e7 --- /dev/null +++ b/src/arithmetic/arithmetic.jl @@ -0,0 +1,246 @@ +module FixedPointArithmetic + +import Base: -, +, *, /, abs, div, fld, cld, rem, mod + +import ..FixedPointNumbers +using FixedPointNumbers: FixedPoint +using FixedPointNumbers.Utilities + +export mul_with_rounding + +""" + Wrapping + +Submodule for wrapping arithmetic +""" +module Wrapping + +import ..FixedPointNumbers +using FixedPointNumbers: FixedPoint +using FixedPointNumbers.Utilities + +export wrapping_neg, wrapping_abs, wrapping_add, wrapping_sub, wrapping_mul +export wrapping_div, wrapping_fld, wrapping_cld, wrapping_rem, wrapping_mod +export wrapping_fdiv + +wrapping_neg(x::X) where {X <: FixedPoint} = X(-x.i, 0) +wrapping_abs(x::X) where {X <: FixedPoint} = X(abs(x.i), 0) +wrapping_add(x::X, y::X) where {X <: FixedPoint} = X(x.i + y.i, 0) +wrapping_sub(x::X, y::X) where {X <: FixedPoint} = X(x.i - y.i, 0) +wrapping_mul(x::X, y::X) where {X <: FixedPoint} = (float(x) * float(y)) % X +function wrapping_fdiv(x::X, y::X) where {X <: FixedPoint} + z = floattype(X)(x.i) / floattype(X)(y.i) + isfinite(z) ? z % X : zero(X) +end +function wrapping_div(x::X, y::X, r::RoundingMode=RoundToZero) where {T,X <: FixedPoint{T}} + z = round(floattype(X)(x.i) / floattype(X)(y.i), r) + isfinite(z) || return zero(T) + if T <: Unsigned + _unsafe_trunc(T, z) + else + z > typemax(T) ? typemin(T) : _unsafe_trunc(T, z) + end +end +wrapping_fld(x::X, y::X) where {X <: FixedPoint} = wrapping_div(x, y, RoundDown) +wrapping_cld(x::X, y::X) where {X <: FixedPoint} = wrapping_div(x, y, RoundUp) +wrapping_rem(x::X, y::X, r::RoundingMode=RoundToZero) where {T,X <: FixedPoint{T}} = + X(x.i - wrapping_div(x, y, r) * y.i, 0) +wrapping_mod(x::X, y::X) where {X <: FixedPoint} = wrapping_rem(x, y, RoundDown) + +end # module Wrapping + +""" + Saturating + +Submodule for saturating arithmetic +""" +module Saturating + +import ..FixedPointNumbers +using FixedPointNumbers: FixedPoint +using FixedPointNumbers.Utilities + +export saturating_neg, saturating_abs, saturating_add, saturating_sub, saturating_mul +export saturating_div, saturating_fld, saturating_cld, saturating_rem, saturating_mod +export saturating_fdiv + +saturating_neg(x::X) where {X <: FixedPoint} = X(~min(x.i - true, x.i), 0) +saturating_neg(x::X) where {X <: FixedPoint{<:Unsigned}} = zero(X) + +saturating_abs(x::X) where {X <: FixedPoint} = + X(ifelse(signbit(abs(x.i)), typemax(x.i), abs(x.i)), 0) + +saturating_add(x::X, y::X) where {X <: FixedPoint} = + X(x.i + ifelse(x.i < 0, max(y.i, typemin(x.i) - x.i), min(y.i, typemax(x.i) - x.i)), 0) +saturating_add(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i + min(~x.i, y.i), 0) + +saturating_sub(x::X, y::X) where {X <: FixedPoint} = + X(x.i - ifelse(x.i < 0, min(y.i, x.i - typemin(x.i)), max(y.i, x.i - typemax(x.i))), 0) +saturating_sub(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i - min(x.i, y.i), 0) + +saturating_mul(x::X, y::X) where {X <: FixedPoint} = clamp(float(x) * float(y), X) + +saturating_fdiv(x::X, y::X) where {X <: FixedPoint} = + clamp(floattype(X)(x.i) / floattype(X)(y.i), X) + +function saturating_div(x::X, y::X, r::RoundingMode=RoundToZero) where {T, X <: FixedPoint{T}} + z = round(floattype(X)(x.i) / floattype(X)(y.i), r) + isnan(z) && return zero(T) + if T <: Unsigned + isfinite(z) ? _unsafe_trunc(T, z) : typemax(T) + else + _unsafe_trunc(T, clamp(z, typemin(T), typemax(T))) + end +end +saturating_fld(x::X, y::X) where {X <: FixedPoint} = saturating_div(x, y, RoundDown) +saturating_cld(x::X, y::X) where {X <: FixedPoint} = saturating_div(x, y, RoundUp) +function saturating_rem(x::X, y::X, r::RoundingMode=RoundToZero) where {T, X <: FixedPoint{T}} + T <: Unsigned && r isa RoundingMode{:Up} && return zero(X) + X(x.i - saturating_div(x, y, r) * y.i, 0) +end +saturating_mod(x::X, y::X) where {X <: FixedPoint} = saturating_rem(x, y, RoundDown) + +end # module Saturating + +""" + CheckedArithMetic +""" +module Checked + +import ..FixedPointNumbers +using FixedPointNumbers: FixedPoint, showtype +using FixedPointNumbers.Utilities + +import Base.Checked: checked_neg, checked_abs, checked_add, checked_sub, checked_mul +import Base.Checked: checked_div, checked_fld, checked_cld, checked_rem, checked_mod + +export checked_neg, checked_abs, checked_add, checked_sub, checked_mul +export checked_div, checked_fld, checked_cld, checked_rem, checked_mod +export checked_fdiv + +checked_neg(x::X) where {X <: FixedPoint} = checked_sub(zero(X), x) +function checked_abs(x::X) where {X <: FixedPoint} + abs(x.i) >= 0 || throw_overflowerror_abs(x) + X(abs(x.i), 0) +end +function checked_add(x::X, y::X) where {X <: FixedPoint} + r, f = Base.Checked.add_with_overflow(x.i, y.i) + z = X(r, 0) # store first + f && throw_overflowerror(:+, x, y) + z +end +function checked_sub(x::X, y::X) where {X <: FixedPoint} + r, f = Base.Checked.sub_with_overflow(x.i, y.i) + z = X(r, 0) # store first + f && throw_overflowerror(:-, x, y) + z +end +function checked_mul(x::X, y::X) where {X <: FixedPoint} + z = float(x) * float(y) + typemin(X) - eps(X) / 2 <= z < typemax(X) + eps(X) / 2 || throw_overflowerror(:*, x, y) + z % X +end +function checked_fdiv(x::X, y::X) where {T,X <: FixedPoint{T}} + y === zero(X) && throw(DivideError()) + z = floattype(X)(x.i) / floattype(X)(y.i) + if T <: Unsigned + z < typemax(X) + eps(X) / 2 || throw_overflowerror(:/, x, y) + else + typemin(X) - eps(X) / 2 <= z < typemax(X) + eps(X) / 2 || throw_overflowerror(:/, x, y) + end + z % X +end +function checked_div(x::X, y::X, r::RoundingMode=RoundToZero) where {T, X <: FixedPoint{T}} + y === zero(X) && throw(DivideError()) + z = round(floattype(X)(x.i) / floattype(X)(y.i), r) + if T <: Signed + z <= typemax(T) || throw_overflowerror_div(r, x, y) + end + _unsafe_trunc(T, z) +end +checked_fld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundDown) +checked_cld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundUp) +function checked_rem(x::X, y::X, r::RoundingMode=RoundToZero) where {T, X <: FixedPoint{T}} + y === zero(X) && throw(DivideError()) + fx, fy = floattype(X)(x.i), floattype(X)(y.i) + z = fx - round(fx / fy, r) * fy + if T <: Unsigned && r isa RoundingMode{:Up} + z >= zero(z) || throw_overflowerror_rem(r, x, y) + end + X(_unsafe_trunc(T, z), 0) +end +checked_mod(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) + +@noinline function throw_overflowerror(op::Symbol, @nospecialize(x), @nospecialize(y)) + io = IOBuffer() + print(io, x, ' ', op, ' ', y, " overflowed for type ") + showtype(io, typeof(x)) + throw(OverflowError(String(take!(io)))) +end +@noinline function throw_overflowerror_abs(@nospecialize(x)) + io = IOBuffer() + print(io, "abs(", x, ") overflowed for type ") + showtype(io, typeof(x)) + throw(OverflowError(String(take!(io)))) +end +@noinline function throw_overflowerror_div(r::RoundingMode, @nospecialize(x), @nospecialize(y)) + io = IOBuffer() + op = r === RoundUp ? "cld(" : r === RoundDown ? "fld(" : "div(" + print(io, op, x, ", ", y, ") overflowed for type ", rawtype(x)) + throw(OverflowError(String(take!(io)))) +end +@noinline function throw_overflowerror_rem(r::RoundingMode, @nospecialize(x), @nospecialize(y)) + io = IOBuffer() + print(io, "rem(", x, ", ", y, ", ", r, ") overflowed for type ", typeof(x)) + throw(OverflowError(String(take!(io)))) +end + +end # module Checked + +using .Wrapping +using .Saturating +using .Checked + +using .Checked: throw_overflowerror + +# re-export +for name in names(Wrapping) + @eval export $name +end +for name in names(Saturating) + @eval export $name +end +for name in names(Checked) + @eval export $name +end + +include("normed_arithmetic.jl") +include("fixed_arithmetic.jl") + +# default arithmetic +const DEFAULT_ARITHMETIC = :wrapping + +for (op, name) in ((:-, :neg), (:abs, :abs)) + f = Symbol(DEFAULT_ARITHMETIC, :_, name) + @eval begin + $op(x::X) where {X <: FixedPoint} = $f(x) + end +end +for (op, name) in ((:+, :add), (:-, :sub), (:*, :mul)) + f = Symbol(DEFAULT_ARITHMETIC, :_, name) + @eval begin + $op(x::X, y::X) where {X <: FixedPoint} = $f(x, y) + end +end + +# force checked arithmetic +/(x::X, y::X) where {X <: FixedPoint} = checked_fdiv(x, y) +div(x::X, y::X, r::RoundingMode=RoundToZero) where {X <: FixedPoint} = checked_div(x, y, r) +fld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundDown) +cld(x::X, y::X) where {X <: FixedPoint} = checked_div(x, y, RoundUp) +rem(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundToZero) +rem(x::X, y::X, ::RoundingMode{:Down}) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) +rem(x::X, y::X, ::RoundingMode{:Up}) where {X <: FixedPoint} = checked_rem(x, y, RoundUp) +mod(x::X, y::X) where {X <: FixedPoint} = checked_rem(x, y, RoundDown) + +end # module FixedPointArithmetic diff --git a/src/arithmetic/fixed_arithmetic.jl b/src/arithmetic/fixed_arithmetic.jl new file mode 100644 index 00000000..eb7a9b82 --- /dev/null +++ b/src/arithmetic/fixed_arithmetic.jl @@ -0,0 +1,48 @@ +using FixedPointNumbers: Fixed +import .Wrapping: wrapping_mul +import .Saturating: saturating_mul +import .Checked: checked_mul + +# wrapping arithmetic +function wrapping_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} + z = widemul(x.i, y.i) + F(div_2f(z, Val(Int(f))) % T, 0) +end + +# saturating arithmetic +function saturating_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} + if f >= bitwidth(T) - 1 + z = min(widemul(x.i, y.i), widen1(typemax(T)) << f) + else + z = clamp(widemul(x.i, y.i), widen1(typemin(T)) << f, widen1(typemax(T)) << f) + end + F(div_2f(z, Val(Int(f))) % T, 0) +end + +# checked arithmetic +function checked_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} + z = widemul(x.i, y.i) + if f < 1 + m = widen1(typemax(T)) + 0x1 + n = widen1(typemin(T)) + else + half = widen1(oneunit(T)) << (f - 1) + m = widen1(typemax(T)) << f + half + n = widen1(typemin(T)) << f - half + end + (n <= z) & (z < m) || throw_overflowerror(:*, x, y) + F(div_2f(z, Val(Int(f))) % T, 0) +end + +function mul_with_rounding(x::F, y::F, ::RoundingMode{:Nearest}) where {F <: Fixed} + wrapping_mul(x, y) +end +function mul_with_rounding(x::F, y::F, ::RoundingMode{:NearestTiesUp}) where +{T<:Union{Int8,Int16,Int32,Int64},f,F <: Fixed{T,f}} + z = widemul(x.i, y.i) + F(((z + (oftype(z, 1) << f >>> 1)) >> f) % T, 0) +end +function mul_with_rounding(x::F, y::F, ::RoundingMode{:Down}) where +{T<:Union{Int8,Int16,Int32,Int64},f,F <: Fixed{T,f}} + F((widemul(x.i, y.i) >> f) % T, 0) +end diff --git a/src/arithmetic/normed_arithmetic.jl b/src/arithmetic/normed_arithmetic.jl new file mode 100644 index 00000000..0cf1fe52 --- /dev/null +++ b/src/arithmetic/normed_arithmetic.jl @@ -0,0 +1,31 @@ +using FixedPointNumbers: Normed +import .Wrapping: wrapping_mul +import .Saturating: saturating_mul +import .Checked: checked_mul + +# wrapping arithmetic +function wrapping_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + z = widemul(x.i, y.i) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end + +# saturating arithmetic +function saturating_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + f == bitwidth(T) && return wrapping_mul(x, y) + z = min(widemul(x.i, y.i), widemul(typemax(N).i, rawone(N))) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end + +# checked arithmetic +function checked_mul(x::N, y::N) where {N <: Normed} + z = float(x) * float(y) + z < typemax(N) + eps(N) / 2 || throw_overflowerror(:*, x, y) + z % N +end +function checked_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + f == bitwidth(T) && return wrapping_mul(x, y) + z = widemul(x.i, y.i) + m = widemul(typemax(N).i, rawone(N)) + (rawone(N) >> 0x1) + z < m || throw_overflowerror(:*, x, y) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end diff --git a/src/fixed.jl b/src/fixed.jl index 1abd7ff4..8a5460ff 100644 --- a/src/fixed.jl +++ b/src/fixed.jl @@ -127,59 +127,6 @@ function Base.Rational(x::Fixed{T,f}) where {T, f} f < bitwidth(T)-1 ? x.i//rawone(x) : x.i//(one(widen1(T))<>> (bitwidth(T) - f - 1)) - half = oneunit(T) << (f - 1) - c = half - (xf === half) - (x + c) >> f -end -div_2f(x::T, ::Val{0}) where {T} = x - -# wrapping arithmetic -function wrapping_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} - z = widemul(x.i, y.i) - F(div_2f(z, Val(Int(f))) % T, 0) -end - -# saturating arithmetic -function saturating_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} - if f >= bitwidth(T) - 1 - z = min(widemul(x.i, y.i), widen1(typemax(T)) << f) - else - z = clamp(widemul(x.i, y.i), widen1(typemin(T)) << f, widen1(typemax(T)) << f) - end - F(div_2f(z, Val(Int(f))) % T, 0) -end - -# checked arithmetic -function checked_mul(x::F, y::F) where {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T,f}} - z = widemul(x.i, y.i) - if f < 1 - m = widen1(typemax(T)) + 0x1 - n = widen1(typemin(T)) - else - half = widen1(oneunit(T)) << (f - 1) - m = widen1(typemax(T)) << f + half - n = widen1(typemin(T)) << f - half - end - (n <= z) & (z < m) || throw_overflowerror(:*, x, y) - F(div_2f(z, Val(Int(f))) % T, 0) -end - -function mul_with_rounding(x::F, y::F, ::RoundingMode{:Nearest}) where {F <: Fixed} - wrapping_mul(x, y) -end -function mul_with_rounding(x::F, y::F, ::RoundingMode{:NearestTiesUp}) where - {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T, f}} - z = widemul(x.i, y.i) - F(((z + (oftype(z, 1) << f >>> 1)) >> f) % T, 0) -end -function mul_with_rounding(x::F, y::F, ::RoundingMode{:Down}) where - {T <: Union{Int8,Int16,Int32,Int64}, f, F <: Fixed{T, f}} - F((widemul(x.i, y.i) >> f) % T, 0) -end - - function trunc(x::Fixed{T,f}) where {T, f} f == 0 && return x f == bitwidth(T) && return zero(x) # TODO: remove this line diff --git a/src/normed.jl b/src/normed.jl index a8fae420..c9a766f6 100644 --- a/src/normed.jl +++ b/src/normed.jl @@ -253,41 +253,6 @@ Base.BigFloat(x::Normed) = reinterpret(x) / BigFloat(rawone(x)) Base.Rational(x::Normed) = reinterpret(x)//rawone(x) -# Division by `2^f-1` with RoundNearest. The result would be in the lower half bits. -div_2fm1(x::T, ::Val{f}) where {T, f} = (x + (T(1)<<(f - 1) - 0x1)) ÷ (T(1) << f - 0x1) -div_2fm1(x::T, ::Val{1}) where T = x -div_2fm1(x::UInt16, ::Val{8}) = (((x + 0x80) >> 0x8) + x + 0x80) >> 0x8 -div_2fm1(x::UInt32, ::Val{16}) = (((x + 0x8000) >> 0x10) + x + 0x8000) >> 0x10 -div_2fm1(x::UInt64, ::Val{32}) = (((x + 0x80000000) >> 0x20) + x + 0x80000000) >> 0x20 -div_2fm1(x::UInt128, ::Val{64}) = (((x + 0x8000000000000000) >> 0x40) + x + 0x8000000000000000) >> 0x40 - -# wrapping arithmetic -function wrapping_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} - z = widemul(x.i, y.i) - N(div_2fm1(z, Val(Int(f))) % T, 0) -end - -# saturating arithmetic -function saturating_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} - f == bitwidth(T) && return wrapping_mul(x, y) - z = min(widemul(x.i, y.i), widemul(typemax(N).i, rawone(N))) - N(div_2fm1(z, Val(Int(f))) % T, 0) -end - -# checked arithmetic -function checked_mul(x::N, y::N) where {N <: Normed} - z = float(x) * float(y) - z < typemax(N) + eps(N)/2 || throw_overflowerror(:*, x, y) - z % N -end -function checked_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} - f == bitwidth(T) && return wrapping_mul(x, y) - z = widemul(x.i, y.i) - m = widemul(typemax(N).i, rawone(N)) + (rawone(N) >> 0x1) - z < m || throw_overflowerror(:*, x, y) - N(div_2fm1(z, Val(Int(f))) % T, 0) -end - # Functions floor(x::N) where {N <: Normed} = reinterpret(N, x.i - x.i % rawone(N)) diff --git a/src/utilities.jl b/src/utilities.jl index 29573548..049daa1d 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,4 +1,35 @@ -# utility functions and macros, which are independent of `FixedPoint` +""" +Module for utility functions and macros, which are independent of `FixedPoint`. +This also includes the declaration of public or internal APIs. +""" +module Utilities + +export ShortInts +export LongInts +export ShorterThanInt +export NotBiggerThanInt +export NotBiggerThanInt64 +export SShorterThanInt +export UShorterThanInt + +export bitwidth, widen1, signedtype, wrapper +export @f32, @exp2, significand_bits, exponent_bias +export _unsafe_trunc +export div_2f, div_2fm1 + +export floattype, rawone, inv_rawone, nbitsfrac, rawtype, signbits, nbitsint +export scaledual + + +const ShortInts = Union{Int8, UInt8, Int16, UInt16} +const LongInts = Union{Int64, UInt64, Int128, UInt128, BigInt} + +const ShorterThanInt = Int === Int32 ? ShortInts : Union{ShortInts, Int32, UInt32} +const NotBiggerThanInt = Union{ShorterThanInt, Int, UInt} +const NotBiggerThanInt64 = Union{ShortInts, Int32, UInt32, Int64, UInt64} +const SShorterThanInt = typeintersect(ShorterThanInt, Signed) +const UShorterThanInt = typeintersect(ShorterThanInt, Unsigned) + bitwidth(T::Type) = 8sizeof(T) widen1(T::Type) = T # fallback @@ -14,14 +45,7 @@ widen1(x::Integer) = x % widen1(typeof(x)) signedtype(::Type{T}) where {T <: Integer} = typeof(signed(zero(T))) -const ShortInts = Union{Int8, UInt8, Int16, UInt16} -const LongInts = Union{Int64, UInt64, Int128, UInt128, BigInt} - -const ShorterThanInt = Int === Int32 ? ShortInts : Union{ShortInts, Int32, UInt32} -const NotBiggerThanInt = Union{ShorterThanInt, Int, UInt} -const NotBiggerThanInt64 = Union{ShortInts, Int32, UInt32, Int64, UInt64} -const SShorterThanInt = typeintersect(ShorterThanInt, Signed) -const UShorterThanInt = typeintersect(ShorterThanInt, Unsigned) +wrapper(@nospecialize(T)) = Base.typename(T).wrapper macro f32(x::Float64) # just for hexadecimal floating-point literals :(Float32($x)) @@ -53,4 +77,46 @@ function _unsafe_trunc(::Type{T}, x::AbstractFloat) where {T <: Integer} end end -wrapper(@nospecialize(T)) = Base.typename(T).wrapper +# Division by `2^f` with RoundNearest. +function div_2f(x::T, ::Val{f}) where {T,f} + xf = x & (T(-1) >>> (bitwidth(T) - f - 1)) + half = oneunit(T) << (f - 1) + c = half - (xf === half) + (x + c) >> f +end +div_2f(x::T, ::Val{0}) where {T} = x + +# Division by `2^f-1` with RoundNearest. The result would be in the lower half bits. +div_2fm1(x::T, ::Val{f}) where {T,f} = (x + (T(1) << (f - 1) - 0x1)) ÷ (T(1) << f - 0x1) +div_2fm1(x::T, ::Val{1}) where {T} = x +div_2fm1(x::UInt16, ::Val{8}) = (((x + 0x80) >> 0x8) + x + 0x80) >> 0x8 +div_2fm1(x::UInt32, ::Val{16}) = (((x + 0x8000) >> 0x10) + x + 0x8000) >> 0x10 +div_2fm1(x::UInt64, ::Val{32}) = (((x + 0x80000000) >> 0x20) + x + 0x80000000) >> 0x20 +div_2fm1(x::UInt128, ::Val{64}) = (((x + 0x8000000000000000) >> 0x40) + x + 0x8000000000000000) >> 0x40 + + + +floattype(::Type{T}) where {T<:AbstractFloat} = T # fallback (we want a MethodError if no method producing AbstractFloat is defined) +floattype(::Type{T}) where {T<:Union{ShortInts,Bool}} = Float32 +floattype(::Type{T}) where {T<:Integer} = Float64 +floattype(::Type{T}) where {T<:LongInts} = BigFloat +floattype(::Type{T}) where {I<:Integer,T<:Rational{I}} = typeof(zero(I) / oneunit(I)) +floattype(::Type{<:AbstractIrrational}) = Float64 +# Non-Real types +floattype(::Type{Complex{T}}) where {T} = Complex{floattype(T)} +floattype(::Type{Base.TwicePrecision{Float64}}) = Float64 # wider would be nice, but hardware support is paramount +floattype(::Type{Base.TwicePrecision{T}}) where {T<:Union{Float16,Float32}} = widen(T) + +function rawone end + +# for Julia v1.0, which does not fold `div_float` before inlining +inv_rawone(x) = (@generated) ? (y = 1.0 / rawone(x); :($y)) : 1.0 / rawone(x) + +function nbitsfrac end +function rawtype end +function signbits end +function nbitsint end + +function scaledual end + +end # module Utilities