Skip to content

Commit 0be45d8

Browse files
committed
dec2flt: Refactor float traits
1 parent 045c989 commit 0be45d8

File tree

4 files changed

+163
-95
lines changed

4 files changed

+163
-95
lines changed

library/core/src/num/dec2flt/float.rs

+157-89
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,56 @@
11
//! Helper trait for generic float types.
22
3+
use core::f64;
4+
35
use crate::fmt::{Debug, LowerExp};
46
use crate::num::FpCategory;
5-
use crate::ops::{Add, Div, Mul, Neg};
7+
use crate::ops::{self, Add, Div, Mul, Neg};
8+
9+
pub trait CastInto<T: Copy>: Copy {
10+
fn cast(self) -> T;
11+
}
12+
13+
pub trait Integer:
14+
Sized
15+
+ Clone
16+
+ Copy
17+
+ Debug
18+
+ ops::Shr<u32, Output = Self>
19+
+ ops::Shl<u32, Output = Self>
20+
+ ops::BitAnd<Output = Self>
21+
+ ops::BitOr<Output = Self>
22+
+ PartialEq
23+
+ CastInto<i16>
24+
{
25+
const ZERO: Self;
26+
const ONE: Self;
27+
}
628

7-
/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
29+
macro_rules! int {
30+
($($ty:ty),+) => {
31+
$(
32+
impl CastInto<i16> for $ty {
33+
fn cast(self) -> i16 {
34+
self as i16
35+
}
36+
}
37+
38+
39+
impl Integer for $ty {
40+
const ZERO: Self = 0;
41+
const ONE: Self = 1;
42+
}
43+
)+
44+
}
45+
}
46+
47+
int!(u16, u32, u64);
48+
49+
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
850
///
951
/// See the parent module's doc comment for why this is necessary.
1052
///
11-
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
53+
/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
1254
#[doc(hidden)]
1355
pub trait RawFloat:
1456
Sized
@@ -24,62 +66,91 @@ pub trait RawFloat:
2466
+ Copy
2567
+ Debug
2668
{
69+
/// The unsigned integer with the same size as the float
70+
type Int: Integer + Into<u64>;
71+
72+
/* general constants */
73+
2774
const INFINITY: Self;
2875
const NEG_INFINITY: Self;
2976
const NAN: Self;
3077
const NEG_NAN: Self;
3178

79+
/// Bit width of the float
80+
const BITS: u32;
81+
82+
/// Mantissa digits including the hidden bit (provided by core)
83+
const MANTISSA_BITS: u32;
84+
85+
const EXPONENT_MASK: Self::Int;
86+
const MANTISSA_MASK: Self::Int;
87+
3288
/// The number of bits in the significand, *excluding* the hidden bit.
33-
const MANTISSA_EXPLICIT_BITS: usize;
34-
35-
// Round-to-even only happens for negative values of q
36-
// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37-
// the 32-bitcase.
38-
//
39-
// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40-
// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41-
// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42-
//
43-
// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44-
// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45-
// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46-
// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47-
// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48-
//
49-
// Thus we have that we only need to round ties to even when
50-
// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51-
// (in the 32-bit case). In both cases,the power of five(5^|q|)
52-
// fits in a 64-bit word.
89+
const MANTISSA_EXPLICIT_BITS: u32 = Self::MANTISSA_BITS - 1;
90+
91+
/// Bits for the exponent
92+
const EXPONENT_BITS: u32 = Self::BITS - Self::MANTISSA_EXPLICIT_BITS - 1;
93+
94+
/// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
95+
// assuming FLT_EVAL_METHOD = 0
96+
const MAX_EXPONENT_FAST_PATH: i64 =
97+
((Self::MANTISSA_BITS as f64) / (f64::consts::LOG2_10 - 1.0)) as i64;
98+
99+
/// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
100+
const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
101+
102+
/// Round-to-even only happens for negative values of q
103+
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
104+
/// the 32-bitcase.
105+
///
106+
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
107+
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
108+
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
109+
///
110+
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
111+
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
112+
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
113+
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
114+
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
115+
///
116+
/// Thus we have that we only need to round ties to even when
117+
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
118+
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
119+
/// fits in a 64-bit word.
53120
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54121
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
55122

56-
// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57-
const MIN_EXPONENT_FAST_PATH: i64;
123+
/// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
124+
const MINIMUM_EXPONENT: i32 = -(1 << (Self::EXPONENT_BITS - 1)) + 1;
58125

59-
// Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60-
const MAX_EXPONENT_FAST_PATH: i64;
126+
/// Maximum exponent without overflowing to infinity
127+
const MAXIMUM_EXPONENT: u32 = (1 << Self::EXPONENT_BITS) - 1;
61128

62-
// Maximum exponent that can be represented for a disguised-fast path case.
63-
// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
129+
/// The exponent bias value
130+
const EXPONENT_BIAS: u32 = Self::MAXIMUM_EXPONENT >> 1;
65131

66-
// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67-
const MINIMUM_EXPONENT: i32;
132+
/// Largest exponent value `(1 << EXP_BITS) - 1`.
133+
const INFINITE_POWER: i32 = (1 << Self::EXPONENT_BITS) - 1;
68134

69-
// Largest exponent value `(1 << EXP_BITS) - 1`.
70-
const INFINITE_POWER: i32;
135+
/// Largest decimal exponent for a non-infinite value.
136+
///
137+
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
138+
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
139+
const LARGEST_POWER_OF_TEN: i32 =
140+
((Self::EXPONENT_BIAS as f64 + 1.0) / f64::consts::LOG2_10) as i32;
71141

72-
// Index (in bits) of the sign.
73-
const SIGN_INDEX: usize;
142+
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143+
// / smaller than `10^SMALLEST_POWER_OF_TEN`.
144+
const SMALLEST_POWER_OF_TEN: i32 =
145+
-(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS + 64) as f64) / f64::consts::LOG2_10) as i32;
74146

75-
// Smallest decimal exponent for a non-zero value.
76-
const SMALLEST_POWER_OF_TEN: i32;
147+
/// Maximum exponent that can be represented for a disguised-fast path case.
148+
/// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
149+
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
150+
Self::MAX_EXPONENT_FAST_PATH + (Self::MANTISSA_BITS as f64 / f64::consts::LOG2_10) as i64;
77151

78-
// Largest decimal exponent for a non-infinite value.
79-
const LARGEST_POWER_OF_TEN: i32;
80-
81-
// Maximum mantissa for the fast-path (`1 << 53` for f64).
82-
const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
152+
/// Maximum mantissa for the fast-path (`1 << 53` for f64).
153+
const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::MANTISSA_BITS;
83154

84155
/// Converts integer into float through an as cast.
85156
/// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +167,45 @@ pub trait RawFloat:
96167
/// Returns the category that this number falls into.
97168
fn classify(self) -> FpCategory;
98169

170+
/// Transmute to the integer representation
171+
fn to_bits(self) -> Self::Int;
172+
99173
/// Returns the mantissa, exponent and sign as integers.
100-
fn integer_decode(self) -> (u64, i16, i8);
174+
///
175+
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
176+
/// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
177+
/// minimum subnormal power.
178+
fn integer_decode(self) -> (u64, i16, i8) {
179+
let bits = self.to_bits();
180+
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
181+
let mut exponent: i16 =
182+
((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_EXPLICIT_BITS).cast();
183+
let mantissa = if exponent == 0 {
184+
(bits & Self::MANTISSA_MASK) << 1
185+
} else {
186+
(bits & Self::MANTISSA_MASK) | (Self::Int::ONE << Self::MANTISSA_EXPLICIT_BITS)
187+
};
188+
// Exponent bias + mantissa shift
189+
exponent -= (Self::EXPONENT_BIAS + Self::MANTISSA_EXPLICIT_BITS) as i16;
190+
(mantissa.into(), exponent, sign)
191+
}
101192
}
102193

103194
impl RawFloat for f32 {
195+
type Int = u32;
196+
104197
const INFINITY: Self = f32::INFINITY;
105198
const NEG_INFINITY: Self = f32::NEG_INFINITY;
106199
const NAN: Self = f32::NAN;
107200
const NEG_NAN: Self = -f32::NAN;
108201

109-
const MANTISSA_EXPLICIT_BITS: usize = 23;
202+
const BITS: u32 = 32;
203+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
204+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
205+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
206+
110207
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111208
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112-
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113-
const MAX_EXPONENT_FAST_PATH: i64 = 10;
114-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115-
const MINIMUM_EXPONENT: i32 = -127;
116-
const INFINITE_POWER: i32 = 0xFF;
117-
const SIGN_INDEX: usize = 31;
118-
const SMALLEST_POWER_OF_TEN: i32 = -65;
119-
const LARGEST_POWER_OF_TEN: i32 = 38;
120209

121210
#[inline]
122211
fn from_u64(v: u64) -> Self {
@@ -136,16 +225,8 @@ impl RawFloat for f32 {
136225
TABLE[exponent & 15]
137226
}
138227

139-
/// Returns the mantissa, exponent and sign as integers.
140-
fn integer_decode(self) -> (u64, i16, i8) {
141-
let bits = self.to_bits();
142-
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
143-
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
144-
let mantissa =
145-
if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
146-
// Exponent bias + mantissa shift
147-
exponent -= 127 + 23;
148-
(mantissa as u64, exponent, sign)
228+
fn to_bits(self) -> Self::Int {
229+
self.to_bits()
149230
}
150231

151232
fn classify(self) -> FpCategory {
@@ -154,22 +235,20 @@ impl RawFloat for f32 {
154235
}
155236

156237
impl RawFloat for f64 {
157-
const INFINITY: Self = f64::INFINITY;
158-
const NEG_INFINITY: Self = f64::NEG_INFINITY;
159-
const NAN: Self = f64::NAN;
160-
const NEG_NAN: Self = -f64::NAN;
238+
type Int = u64;
239+
240+
const INFINITY: Self = Self::INFINITY;
241+
const NEG_INFINITY: Self = Self::NEG_INFINITY;
242+
const NAN: Self = Self::NAN;
243+
const NEG_NAN: Self = -Self::NAN;
244+
245+
const BITS: u32 = 64;
246+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
247+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
248+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
161249

162-
const MANTISSA_EXPLICIT_BITS: usize = 52;
163250
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
164251
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
165-
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
166-
const MAX_EXPONENT_FAST_PATH: i64 = 22;
167-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
168-
const MINIMUM_EXPONENT: i32 = -1023;
169-
const INFINITE_POWER: i32 = 0x7FF;
170-
const SIGN_INDEX: usize = 63;
171-
const SMALLEST_POWER_OF_TEN: i32 = -342;
172-
const LARGEST_POWER_OF_TEN: i32 = 308;
173252

174253
#[inline]
175254
fn from_u64(v: u64) -> Self {
@@ -190,19 +269,8 @@ impl RawFloat for f64 {
190269
TABLE[exponent & 31]
191270
}
192271

193-
/// Returns the mantissa, exponent and sign as integers.
194-
fn integer_decode(self) -> (u64, i16, i8) {
195-
let bits = self.to_bits();
196-
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
197-
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
198-
let mantissa = if exponent == 0 {
199-
(bits & 0xfffffffffffff) << 1
200-
} else {
201-
(bits & 0xfffffffffffff) | 0x10000000000000
202-
};
203-
// Exponent bias + mantissa shift
204-
exponent -= 1023 + 52;
205-
(mantissa, exponent, sign)
272+
fn to_bits(self) -> Self::Int {
273+
self.to_bits()
206274
}
207275

208276
fn classify(self) -> FpCategory {

library/core/src/num/dec2flt/lemire.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
3838
// Normalize our significant digits, so the most-significant bit is set.
3939
let lz = w.leading_zeros();
4040
w <<= lz;
41-
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
41+
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS as usize + 3);
4242
if lo == 0xFFFF_FFFF_FFFF_FFFF {
4343
// If we have failed to approximate w x 5^-q with our 128-bit value.
4444
// Since the addition of 1 could lead to an overflow which could then
@@ -89,7 +89,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
8989
if lo <= 1
9090
&& q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
9191
&& q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
92-
&& mantissa & 3 == 1
92+
&& mantissa & 0b11 == 0b01
9393
&& (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
9494
{
9595
// Zero the lowest bit, so we don't round up.

library/core/src/num/dec2flt/slow.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
8787
}
8888
// Shift the decimal to the hidden bit, and then round the value
8989
// to get the high mantissa+1 bits.
90-
d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);
90+
d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);
9191
let mut mantissa = d.round();
9292
if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {
9393
// Rounding up overflowed to the carry bit, need to

src/etc/test-float-parse/src/traits.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ pub trait Float:
147147
}
148148

149149
macro_rules! impl_float {
150-
($($fty:ty, $ity:ty, $bits:literal);+) => {
150+
($($fty:ty, $ity:ty);+) => {
151151
$(
152152
impl Float for $fty {
153153
type Int = $ity;
154154
type SInt = <Self::Int as Int>::Signed;
155-
const BITS: u32 = $bits;
155+
const BITS: u32 = <$ity>::BITS;
156156
const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1;
157157
const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE;
158158
const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1);
@@ -168,7 +168,7 @@ macro_rules! impl_float {
168168
}
169169
}
170170

171-
impl_float!(f32, u32, 32; f64, u64, 64);
171+
impl_float!(f32, u32; f64, u64;
172172

173173
/// A test generator. Should provide an iterator that produces unique patterns to parse.
174174
///

0 commit comments

Comments
 (0)