Skip to content

Commit ba792a4

Browse files
author
Robin Kruppe
committed
Accurate decimal-to-float parsing routines.
This commit primarily adds implementations of the algorithms from William Clinger's paper "How to Read Floating Point Numbers Accurately". It also includes a lot of infrastructure necessary for those algorithms, and some unit tests. Since these algorithms reject a few (extreme) inputs that were previously accepted, this could be seen as a [breaking-change]
1 parent b7e39a1 commit ba792a4

File tree

13 files changed

+2787
-15
lines changed

13 files changed

+2787
-15
lines changed

src/libcore/num/dec2flt/algorithm.rs

+353
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! The various algorithms from the paper.
12+
13+
use num::flt2dec::strategy::grisu::Fp;
14+
use prelude::v1::*;
15+
use cmp::min;
16+
use cmp::Ordering::{Less, Equal, Greater};
17+
use super::table;
18+
use super::rawfp::{self, Unpacked, RawFloat, fp_to_float, next_float, prev_float};
19+
use super::num::{self, Big};
20+
21+
/// Number of significand bits in Fp
22+
const P: u32 = 64;
23+
24+
// We simply store the best approximation for *all* exponents, so
25+
// the variable "h" and the associated conditions can be omitted.
26+
// This trades performance for space (11 KiB versus... 5 KiB or so?)
27+
28+
fn power_of_ten(e: i16) -> Fp {
29+
assert!(e >= table::MIN_E);
30+
let i = e - table::MIN_E;
31+
let sig = table::POWERS.0[i as usize];
32+
let exp = table::POWERS.1[i as usize];
33+
Fp { f: sig, e: exp }
34+
}
35+
36+
/// The fast path of Bellerophon using machine-sized integers and floats.
37+
///
38+
/// This is extracted into a separate function so that it can be attempted before constructing
39+
/// a bignum.
40+
pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
41+
let num_digits = integral.len() + fractional.len();
42+
// log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end,
43+
// this is just a quick, cheap rejection (and also frees the rest of the code from
44+
// worrying about underflow).
45+
if num_digits > 16 {
46+
return None;
47+
}
48+
if e.abs() >= T::ceil_log5_of_max_sig() as i64 {
49+
return None;
50+
}
51+
let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
52+
if f > T::max_sig() {
53+
return None;
54+
}
55+
let e = e as i16; // Can't overflow because e.abs() <= LOG5_OF_EXP_N
56+
// The case e < 0 cannot be folded into the other branch. Negative powers result in
57+
// a repeating fractional part in binary, which are rounded, which causes real
58+
// (and occasioally quite significant!) errors in the final result.
59+
// The case `e == 0`, however, is unnecessary for correctness. It's just measurably faster.
60+
if e == 0 {
61+
Some(T::from_int(f))
62+
} else if e > 0 {
63+
Some(T::from_int(f) * fp_to_float(power_of_ten(e)))
64+
} else {
65+
Some(T::from_int(f) / fp_to_float(power_of_ten(-e)))
66+
}
67+
}
68+
69+
/// Algorithm Bellerophon is trivial code justified by non-trivial numeric analysis.
70+
///
71+
/// It rounds ``f`` to a float with 64 bit significand and multiplies it by the best approximation
72+
/// of `10^e` (in the same floating point format). This is often enough to get the correct result.
73+
/// However, when the result is close to halfway between two adjecent (ordinary) floats, the
74+
/// compound rounding error from multiplying two approximation means the result may be off by a
75+
/// few bits. When this happens, the iterative Algorithm R fixes things up.
76+
///
77+
/// The hand-wavy "close to halfway" is made precise by the numeric analysis in the paper.
78+
/// In the words of Clinger:
79+
///
80+
/// > Slop, expressed in units of the least significant bit, is an inclusive bound for the error
81+
/// > accumulated during the floating point calculation of the approximation to f * 10^e. (Slop is
82+
/// > not a bound for the true error, but bounds the difference between the approximation z and
83+
/// > the best possible approximation that uses p bits of significand.)
84+
pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
85+
let slop;
86+
if f <= &Big::from_u64(T::max_sig()) {
87+
// The cases abs(e) < log5(2^N) are in fast_path()
88+
slop = if e >= 0 { 0 } else { 3 };
89+
} else {
90+
slop = if e >= 0 { 1 } else { 4 };
91+
}
92+
let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
93+
let exp_p_n = 1 << (P - T::sig_bits() as u32);
94+
let lowbits: i64 = (z.f % exp_p_n) as i64;
95+
// Is the slop large enough to make a difference when
96+
// rounding to n bits?
97+
if (lowbits - exp_p_n as i64 / 2).abs() <= slop {
98+
algorithm_r(f, e, fp_to_float(z))
99+
} else {
100+
fp_to_float(z)
101+
}
102+
}
103+
104+
/// An iterative algorithm that improves a floating point approximation of `f * 10^e`.
105+
///
106+
/// Each iteration gets one unit in the last place closer, which of course takes terribly long to
107+
/// converge if `z0` is even mildly off. Luckily, when used as fallback for Bellerophon, the
108+
/// starting approximation is off by at most one ULP.
109+
fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
110+
let mut z = z0;
111+
loop {
112+
let raw = z.unpack();
113+
let (m, k) = (raw.sig, raw.k);
114+
let mut x = f.clone();
115+
let mut y = Big::from_u64(m);
116+
117+
// Find positive integers `x`, `y` such that `x / y` is exactly `(f * 10^e) / (m * 2^k)`.
118+
// This not only avoids dealing with the signs of `e` and `k`, we also eliminate the
119+
// power of two common to `10^e` and `2^k` to make the numbers smaller.
120+
make_ratio(&mut x, &mut y, e, k);
121+
122+
let m_digits = [(m & 0xFF_FF_FF_FF) as u32, (m >> 32) as u32];
123+
// This is written a bit awkwardly because our bignums don't support
124+
// negative numbers, so we use the absolute value + sign information.
125+
// The multiplication with m_digits can't overflow. If `x` or `y` are large enough that
126+
// we need to worry about overflow, then they are also large enough that`make_ratio` has
127+
// reduced the fraction by a factor of 2^64 or more.
128+
let (d2, d_negative) = if x >= y {
129+
// Don't need x any more, save a clone().
130+
x.sub(&y).mul_pow2(1).mul_digits(&m_digits);
131+
(x, false)
132+
} else {
133+
// Still need y - make a copy.
134+
let mut y = y.clone();
135+
y.sub(&x).mul_pow2(1).mul_digits(&m_digits);
136+
(y, true)
137+
};
138+
139+
if d2 < y {
140+
let mut d2_double = d2;
141+
d2_double.mul_pow2(1);
142+
if m == T::min_sig() && d_negative && d2_double > y {
143+
z = prev_float(z);
144+
} else {
145+
return z;
146+
}
147+
} else if d2 == y {
148+
if m % 2 == 0 {
149+
if m == T::min_sig() && d_negative {
150+
z = prev_float(z);
151+
} else {
152+
return z;
153+
}
154+
} else if d_negative {
155+
z = prev_float(z);
156+
} else {
157+
z = next_float(z);
158+
}
159+
} else if d_negative {
160+
z = prev_float(z);
161+
} else {
162+
z = next_float(z);
163+
}
164+
}
165+
}
166+
167+
/// Given `x = f` and `y = m` where `f` represent input decimal digits as usual and `m` is the
168+
/// significand of a floating point approximation, make the ratio `x / y` equal to
169+
/// `(f * 10^e) / (m * 2^k)`, possibly reduced by a power of two both have in common.
170+
fn make_ratio(x: &mut Big, y: &mut Big, e: i16, k: i16) {
171+
let (e_abs, k_abs) = (e.abs() as usize, k.abs() as usize);
172+
if e >= 0 {
173+
if k >= 0 {
174+
// x = f * 10^e, y = m * 2^k, except that we reduce the fraction by some power of two.
175+
let common = min(e_abs, k_abs);
176+
x.mul_pow5(e_abs).mul_pow2(e_abs - common);
177+
y.mul_pow2(k_abs - common);
178+
} else {
179+
// x = f * 10^e * 2^abs(k), y = m
180+
// This can't overflow because it requires positive `e` and negative `k`, which can
181+
// only happen for values extremely close to 1, which means that `e` and `k` will be
182+
// comparatively tiny.
183+
x.mul_pow5(e_abs).mul_pow2(e_abs + k_abs);
184+
}
185+
} else {
186+
if k >= 0 {
187+
// x = f, y = m * 10^abs(e) * 2^k
188+
// This can't overflow either, see above.
189+
y.mul_pow5(e_abs).mul_pow2(k_abs + e_abs);
190+
} else {
191+
// x = f * 2^abs(k), y = m * 10^abs(e), again reducing by a common power of two.
192+
let common = min(e_abs, k_abs);
193+
x.mul_pow2(k_abs - common);
194+
y.mul_pow5(e_abs).mul_pow2(e_abs - common);
195+
}
196+
}
197+
}
198+
199+
/// Conceptually, Algorithm M is the simplest way to convert a decimal to a float.
200+
///
201+
/// We form a ratio that is equal to `f * 10^e`, then throwing in powers of two until it gives
202+
/// a valid float significand. The binary exponent `k` is the number of times we multiplied
203+
/// numerator or denominator by two, i.e., at all times `f * 10^e` equals `(u / v) * 2^k`.
204+
/// When we have found out significand, we only need to round by inspecting the remainder of the
205+
/// division, which is done in helper functions further below.
206+
///
207+
/// This algorithm is super slow, even with the optimization described in `quick_start()`.
208+
/// However, it's the simplest of the algorithms to adapt for overflow, underflow, and subnormal
209+
/// results. This implementation takes over when Bellerophon and Algorithm R are overwhelmed.
210+
/// Detecting underflow and overflow is easy: The ratio still isn't an in-range significand,
211+
/// yet the minimum/maximum exponent has been reached. In the case of overflow, we simply return
212+
/// infinity.
213+
///
214+
/// Handling underflow and subnormals is trickier. One big problem is that, with the minimum
215+
/// exponent, the ratio might still be too large for a significand. See underflow() for details.
216+
pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
217+
let mut u;
218+
let mut v;
219+
let e_abs = e.abs() as usize;
220+
let mut k = 0;
221+
if e < 0 {
222+
u = f.clone();
223+
v = Big::from_small(1);
224+
v.mul_pow5(e_abs).mul_pow2(e_abs);
225+
} else {
226+
// FIXME possible optimization: generalize big_to_fp so that we can do the equivalent of
227+
// fp_to_float(big_to_fp(u)) here, only without the double rounding.
228+
u = f.clone();
229+
u.mul_pow5(e_abs).mul_pow2(e_abs);
230+
v = Big::from_small(1);
231+
}
232+
quick_start::<T>(&mut u, &mut v, &mut k);
233+
let mut rem = Big::from_small(0);
234+
let mut x = Big::from_small(0);
235+
let min_sig = Big::from_u64(T::min_sig());
236+
let max_sig = Big::from_u64(T::max_sig());
237+
loop {
238+
u.div_rem(&v, &mut x, &mut rem);
239+
if k == T::min_exp_int() {
240+
// We have to stop at the minimum exponent, if we wait until `k < T::min_exp_int()`,
241+
// then we'd be off by a factor of two. Unfortunately this means we have to special-
242+
// case normal numbers with the minimum exponent.
243+
// FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
244+
// that it's actually correct!
245+
if x >= min_sig && x <= max_sig {
246+
break;
247+
}
248+
return underflow(x, v, rem);
249+
}
250+
if k > T::max_exp_int() {
251+
return T::infinity();
252+
}
253+
if x < min_sig {
254+
u.mul_pow2(1);
255+
k -= 1;
256+
} else if x > max_sig {
257+
v.mul_pow2(1);
258+
k += 1;
259+
} else {
260+
break;
261+
}
262+
}
263+
let q = num::to_u64(&x);
264+
let z = rawfp::encode_normal(Unpacked::new(q, k));
265+
round_by_remainder(v, rem, q, z)
266+
}
267+
268+
/// Skip over most AlgorithmM iterations by checking the bit length.
269+
fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
270+
// The bit length is an estimate of the base two logarithm, and log(u / v) = log(u) - log(v).
271+
// The estimate is off by at most 1, but always an under-estimate, so the error on log(u)
272+
// and log(v) are of the same sign and cancel out (if both are large). Therefore the error
273+
// for log(u / v) is at most one as well.
274+
// The target ratio is one where u/v is in an in-range significand. Thus our termination
275+
// condition is log2(u / v) being the significand bits, plus/minus one.
276+
// FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
277+
let target_ratio = f64::sig_bits() as i16;
278+
let log2_u = u.bit_length() as i16;
279+
let log2_v = v.bit_length() as i16;
280+
let mut u_shift: i16 = 0;
281+
let mut v_shift: i16 = 0;
282+
assert!(*k == 0);
283+
loop {
284+
if *k == T::min_exp_int() {
285+
// Underflow or subnormal. Leave it to the main function.
286+
break;
287+
}
288+
if *k == T::max_exp_int() {
289+
// Overflow. Leave it to the main function.
290+
break;
291+
}
292+
let log2_ratio = (log2_u + u_shift) - (log2_v + v_shift);
293+
if log2_ratio < target_ratio - 1 {
294+
u_shift += 1;
295+
*k -= 1;
296+
} else if log2_ratio > target_ratio + 1 {
297+
v_shift += 1;
298+
*k += 1;
299+
} else {
300+
break;
301+
}
302+
}
303+
u.mul_pow2(u_shift as usize);
304+
v.mul_pow2(v_shift as usize);
305+
}
306+
307+
fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
308+
if x < Big::from_u64(T::min_sig()) {
309+
let q = num::to_u64(&x);
310+
let z = rawfp::encode_subnormal(q);
311+
return round_by_remainder(v, rem, q, z);
312+
}
313+
// Ratio isn't an in-range significand with the minimum exponent, so we need to round off
314+
// excess bits and adjust the exponent accordingly. The real value now looks like this:
315+
//
316+
// x lsb
317+
// /--------------\/
318+
// 1010101010101010.10101010101010 * 2^k
319+
// \-----/\-------/ \------------/
320+
// q trunc. (represented by rem)
321+
//
322+
// Therefore, when the rounded-off bits are != 0.5 ULP, they decide the rounding
323+
// on their own. When they are equal and the remainder is non-zero, the value still
324+
// needs to be rounded up. Only when the rounded off bits are 1/2 and the remainer
325+
// is zero, we have a half-to-even situation.
326+
let bits = x.bit_length();
327+
let lsb = bits - T::sig_bits() as usize;
328+
let q = num::get_bits(&x, lsb, bits);
329+
let k = T::min_exp_int() + lsb as i16;
330+
let z = rawfp::encode_normal(Unpacked::new(q, k));
331+
let q_even = q % 2 == 0;
332+
match num::compare_with_half_ulp(&x, lsb) {
333+
Greater => next_float(z),
334+
Less => z,
335+
Equal if rem.is_zero() && q_even => z,
336+
Equal => next_float(z),
337+
}
338+
}
339+
340+
/// Ordinary round-to-even, obfuscated by having to round based on the remainder of a division.
341+
fn round_by_remainder<T: RawFloat>(v: Big, r: Big, q: u64, z: T) -> T {
342+
let mut v_minus_r = v;
343+
v_minus_r.sub(&r);
344+
if r < v_minus_r {
345+
z
346+
} else if r > v_minus_r {
347+
next_float(z)
348+
} else if q % 2 == 0 {
349+
z
350+
} else {
351+
next_float(z)
352+
}
353+
}

0 commit comments

Comments
 (0)