Skip to content

Commit c337164

Browse files
committed
Add a benchmark for UniformInt sample_single
1 parent 1d54f11 commit c337164

File tree

1 file changed

+205
-6
lines changed

1 file changed

+205
-6
lines changed

benches/distributions.rs

Lines changed: 205 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ extern crate test;
1717
const RAND_BENCH_N: u64 = 1000;
1818

1919
use rand::distributions::{Alphanumeric, Open01, OpenClosed01, Standard, Uniform};
20+
use rand::distributions::uniform::{UniformInt, UniformSampler};
2021
use std::mem::size_of;
2122
use std::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8};
2223
use std::time::Duration;
23-
use test::Bencher;
24+
use test::{Bencher, black_box};
2425

2526
use rand::prelude::*;
2627

@@ -207,11 +208,22 @@ macro_rules! gen_range_int {
207208
};
208209
}
209210

210-
gen_range_int!(gen_range_i8, i8, -20i8, 100);
211-
gen_range_int!(gen_range_i16, i16, -500i16, 2000);
212-
gen_range_int!(gen_range_i32, i32, -200_000_000i32, 800_000_000);
213-
gen_range_int!(gen_range_i64, i64, 3i64, 123_456_789_123);
214-
gen_range_int!(gen_range_i128, i128, -12345678901234i128, 123_456_789_123_456_789);
211+
// Algorithms such as Fisher–Yates shuffle often require uniform values from an
212+
// incrementing range 0..n. We use -1..n here to prevent wrapping in the test
213+
// from generating a 0-sized range.
214+
gen_range_int!(gen_range_i8_low, i8, -1i8, 0);
215+
gen_range_int!(gen_range_i16_low, i16, -1i16, 0);
216+
gen_range_int!(gen_range_i32_low, i32, -1i32, 0);
217+
gen_range_int!(gen_range_i64_low, i64, -1i64, 0);
218+
gen_range_int!(gen_range_i128_low, i128, -1i128, 0);
219+
220+
// These were the initially tested ranges. They are likely to see fewer
221+
// rejections than the low tests.
222+
gen_range_int!(gen_range_i8_high, i8, -20i8, 100);
223+
gen_range_int!(gen_range_i16_high, i16, -500i16, 2000);
224+
gen_range_int!(gen_range_i32_high, i32, -200_000_000i32, 800_000_000);
225+
gen_range_int!(gen_range_i64_high, i64, 3i64, 123_456_789_123);
226+
gen_range_int!(gen_range_i128_high, i128, -12345678901234i128, 123_456_789_123_456_789);
215227

216228
// construct and sample from a floating-point range
217229
macro_rules! gen_range_float {
@@ -239,3 +251,190 @@ macro_rules! gen_range_float {
239251

240252
gen_range_float!(gen_range_f32, f32, -20000.0f32, 100000.0);
241253
gen_range_float!(gen_range_f64, f64, 123.456f64, 7890.12);
254+
255+
256+
// In src/distributions/uniform.rs, we say:
257+
// Implementation of [`uniform_single`] is optional, and is only useful when
258+
// the implementation can be faster than `Self::new(low, high).sample(rng)`.
259+
260+
// `UniformSampler::uniform_single` compromises on the rejection range to be
261+
// faster. This benchmark demonstrates both the speed gain of doing this, and
262+
// the worst case behavior.
263+
264+
/// Sample random values from a pre-existing distribution. This uses the
265+
/// half open `new` to be equivalent to the behavior of `uniform_single`.
266+
macro_rules! uniform_sample {
267+
($fnn:ident, $type:ident, $low:expr, $high:expr, $count:expr) => {
268+
#[bench]
269+
fn $fnn(b: &mut Bencher) {
270+
let mut rng = Pcg64Mcg::from_entropy();
271+
let low = black_box($low);
272+
let high = black_box($high);
273+
b.iter(|| {
274+
for _ in 0..10 {
275+
let dist = UniformInt::<$type>::new(low, high);
276+
for _ in 0..$count {
277+
black_box(dist.sample(&mut rng));
278+
}
279+
}
280+
});
281+
}
282+
};
283+
}
284+
285+
macro_rules! uniform_inclusive {
286+
($fnn:ident, $type:ident, $low:expr, $high:expr, $count:expr) => {
287+
#[bench]
288+
fn $fnn(b: &mut Bencher) {
289+
let mut rng = Pcg64Mcg::from_entropy();
290+
let low = black_box($low);
291+
let high = black_box($high);
292+
b.iter(|| {
293+
for _ in 0..10 {
294+
let dist = UniformInt::<$type>::new_inclusive(low, high);
295+
for _ in 0..$count {
296+
black_box(dist.sample(&mut rng));
297+
}
298+
}
299+
});
300+
}
301+
};
302+
}
303+
304+
/// Use `uniform_single` to create a one-off random value
305+
macro_rules! uniform_single {
306+
($fnn:ident, $type:ident, $low:expr, $high:expr, $count:expr) => {
307+
#[bench]
308+
fn $fnn(b: &mut Bencher) {
309+
let mut rng = Pcg64Mcg::from_entropy();
310+
let low = black_box($low);
311+
let high = black_box($high);
312+
b.iter(|| {
313+
for _ in 0..(10 * $count) {
314+
black_box(UniformInt::<$type>::sample_single(low, high, &mut rng));
315+
}
316+
});
317+
}
318+
};
319+
}
320+
321+
322+
// Benchmark:
323+
// n: can use the full generated range
324+
// (n-1): only the max value is rejected: expect this to be fast
325+
// n/2+1: almost half of the values are rejected, and we can do no better
326+
// n/2: approximation rejects half the values but powers of 2 could have no rejection
327+
// n/2-1: only a few values are rejected: expect this to be fast
328+
// 6: approximation rejects 25% of values but could be faster. However modulo by
329+
// low numbers is typically more expensive
330+
331+
// With the use of u32 as the minimum generated width, the worst-case u16 range
332+
// (32769) will only reject 32769 / 4294967296 samples.
333+
const HALF_16_BIT_UNSIGNED: u16 = 1 << 15;
334+
335+
uniform_sample!(uniform_u16x1_allm1_new, u16, 0, u16::max_value(), 1);
336+
uniform_sample!(uniform_u16x1_halfp1_new, u16, 0, HALF_16_BIT_UNSIGNED + 1, 1);
337+
uniform_sample!(uniform_u16x1_half_new, u16, 0, HALF_16_BIT_UNSIGNED, 1);
338+
uniform_sample!(uniform_u16x1_halfm1_new, u16, 0, HALF_16_BIT_UNSIGNED - 1, 1);
339+
uniform_sample!(uniform_u16x1_6_new, u16, 0, 6u16, 1);
340+
341+
uniform_single!(uniform_u16x1_allm1_single, u16, 0, u16::max_value(), 1);
342+
uniform_single!(uniform_u16x1_halfp1_single, u16, 0, HALF_16_BIT_UNSIGNED + 1, 1);
343+
uniform_single!(uniform_u16x1_half_single, u16, 0, HALF_16_BIT_UNSIGNED, 1);
344+
uniform_single!(uniform_u16x1_halfm1_single, u16, 0, HALF_16_BIT_UNSIGNED - 1, 1);
345+
uniform_single!(uniform_u16x1_6_single, u16, 0, 6u16, 1);
346+
347+
uniform_inclusive!(uniform_u16x10_all_new_inclusive, u16, 0, u16::max_value(), 10);
348+
uniform_sample!(uniform_u16x10_allm1_new, u16, 0, u16::max_value(), 10);
349+
uniform_sample!(uniform_u16x10_halfp1_new, u16, 0, HALF_16_BIT_UNSIGNED + 1, 10);
350+
uniform_sample!(uniform_u16x10_half_new, u16, 0, HALF_16_BIT_UNSIGNED, 10);
351+
uniform_sample!(uniform_u16x10_halfm1_new, u16, 0, HALF_16_BIT_UNSIGNED - 1, 10);
352+
uniform_sample!(uniform_u16x10_6_new, u16, 0, 6u16, 10);
353+
354+
uniform_single!(uniform_u16x10_allm1_single, u16, 0, u16::max_value(), 10);
355+
uniform_single!(uniform_u16x10_halfp1_single, u16, 0, HALF_16_BIT_UNSIGNED + 1, 10);
356+
uniform_single!(uniform_u16x10_half_single, u16, 0, HALF_16_BIT_UNSIGNED, 10);
357+
uniform_single!(uniform_u16x10_halfm1_single, u16, 0, HALF_16_BIT_UNSIGNED - 1, 10);
358+
uniform_single!(uniform_u16x10_6_single, u16, 0, 6u16, 10);
359+
360+
361+
const HALF_32_BIT_UNSIGNED: u32 = 1 << 31;
362+
363+
uniform_sample!(uniform_u32x1_allm1_new, u32, 0, u32::max_value(), 1);
364+
uniform_sample!(uniform_u32x1_halfp1_new, u32, 0, HALF_32_BIT_UNSIGNED + 1, 1);
365+
uniform_sample!(uniform_u32x1_half_new, u32, 0, HALF_32_BIT_UNSIGNED, 1);
366+
uniform_sample!(uniform_u32x1_halfm1_new, u32, 0, HALF_32_BIT_UNSIGNED - 1, 1);
367+
uniform_sample!(uniform_u32x1_6_new, u32, 0, 6u32, 1);
368+
369+
uniform_single!(uniform_u32x1_allm1_single, u32, 0, u32::max_value(), 1);
370+
uniform_single!(uniform_u32x1_halfp1_single, u32, 0, HALF_32_BIT_UNSIGNED + 1, 1);
371+
uniform_single!(uniform_u32x1_half_single, u32, 0, HALF_32_BIT_UNSIGNED, 1);
372+
uniform_single!(uniform_u32x1_halfm1_single, u32, 0, HALF_32_BIT_UNSIGNED - 1, 1);
373+
uniform_single!(uniform_u32x1_6_single, u32, 0, 6u32, 1);
374+
375+
uniform_inclusive!(uniform_u32x10_all_new_inclusive, u32, 0, u32::max_value(), 10);
376+
uniform_sample!(uniform_u32x10_allm1_new, u32, 0, u32::max_value(), 10);
377+
uniform_sample!(uniform_u32x10_halfp1_new, u32, 0, HALF_32_BIT_UNSIGNED + 1, 10);
378+
uniform_sample!(uniform_u32x10_half_new, u32, 0, HALF_32_BIT_UNSIGNED, 10);
379+
uniform_sample!(uniform_u32x10_halfm1_new, u32, 0, HALF_32_BIT_UNSIGNED - 1, 10);
380+
uniform_sample!(uniform_u32x10_6_new, u32, 0, 6u32, 10);
381+
382+
uniform_single!(uniform_u32x10_allm1_single, u32, 0, u32::max_value(), 10);
383+
uniform_single!(uniform_u32x10_halfp1_single, u32, 0, HALF_32_BIT_UNSIGNED + 1, 10);
384+
uniform_single!(uniform_u32x10_half_single, u32, 0, HALF_32_BIT_UNSIGNED, 10);
385+
uniform_single!(uniform_u32x10_halfm1_single, u32, 0, HALF_32_BIT_UNSIGNED - 1, 10);
386+
uniform_single!(uniform_u32x10_6_single, u32, 0, 6u32, 10);
387+
388+
const HALF_64_BIT_UNSIGNED: u64 = 1 << 63;
389+
390+
uniform_sample!(uniform_u64x1_allm1_new, u64, 0, u64::max_value(), 1);
391+
uniform_sample!(uniform_u64x1_halfp1_new, u64, 0, HALF_64_BIT_UNSIGNED + 1, 1);
392+
uniform_sample!(uniform_u64x1_half_new, u64, 0, HALF_64_BIT_UNSIGNED, 1);
393+
uniform_sample!(uniform_u64x1_halfm1_new, u64, 0, HALF_64_BIT_UNSIGNED - 1, 1);
394+
uniform_sample!(uniform_u64x1_6_new, u64, 0, 6u64, 1);
395+
396+
uniform_single!(uniform_u64x1_allm1_single, u64, 0, u64::max_value(), 1);
397+
uniform_single!(uniform_u64x1_halfp1_single, u64, 0, HALF_64_BIT_UNSIGNED + 1, 1);
398+
uniform_single!(uniform_u64x1_half_single, u64, 0, HALF_64_BIT_UNSIGNED, 1);
399+
uniform_single!(uniform_u64x1_halfm1_single, u64, 0, HALF_64_BIT_UNSIGNED - 1, 1);
400+
uniform_single!(uniform_u64x1_6_single, u64, 0, 6u64, 1);
401+
402+
uniform_inclusive!(uniform_u64x10_all_new_inclusive, u64, 0, u64::max_value(), 10);
403+
uniform_sample!(uniform_u64x10_allm1_new, u64, 0, u64::max_value(), 10);
404+
uniform_sample!(uniform_u64x10_halfp1_new, u64, 0, HALF_64_BIT_UNSIGNED + 1, 10);
405+
uniform_sample!(uniform_u64x10_half_new, u64, 0, HALF_64_BIT_UNSIGNED, 10);
406+
uniform_sample!(uniform_u64x10_halfm1_new, u64, 0, HALF_64_BIT_UNSIGNED - 1, 10);
407+
uniform_sample!(uniform_u64x10_6_new, u64, 0, 6u64, 10);
408+
409+
uniform_single!(uniform_u64x10_allm1_single, u64, 0, u64::max_value(), 10);
410+
uniform_single!(uniform_u64x10_halfp1_single, u64, 0, HALF_64_BIT_UNSIGNED + 1, 10);
411+
uniform_single!(uniform_u64x10_half_single, u64, 0, HALF_64_BIT_UNSIGNED, 10);
412+
uniform_single!(uniform_u64x10_halfm1_single, u64, 0, HALF_64_BIT_UNSIGNED - 1, 10);
413+
uniform_single!(uniform_u64x10_6_single, u64, 0, 6u64, 10);
414+
415+
const HALF_128_BIT_UNSIGNED: u128 = 1 << 127;
416+
417+
uniform_sample!(uniform_u128x1_allm1_new, u128, 0, u128::max_value(), 1);
418+
uniform_sample!(uniform_u128x1_halfp1_new, u128, 0, HALF_128_BIT_UNSIGNED + 1, 1);
419+
uniform_sample!(uniform_u128x1_half_new, u128, 0, HALF_128_BIT_UNSIGNED, 1);
420+
uniform_sample!(uniform_u128x1_halfm1_new, u128, 0, HALF_128_BIT_UNSIGNED - 1, 1);
421+
uniform_sample!(uniform_u128x1_6_new, u128, 0, 6u128, 1);
422+
423+
uniform_single!(uniform_u128x1_allm1_single, u128, 0, u128::max_value(), 1);
424+
uniform_single!(uniform_u128x1_halfp1_single, u128, 0, HALF_128_BIT_UNSIGNED + 1, 1);
425+
uniform_single!(uniform_u128x1_half_single, u128, 0, HALF_128_BIT_UNSIGNED, 1);
426+
uniform_single!(uniform_u128x1_halfm1_single, u128, 0, HALF_128_BIT_UNSIGNED - 1, 1);
427+
uniform_single!(uniform_u128x1_6_single, u128, 0, 6u128, 1);
428+
429+
uniform_inclusive!(uniform_u128x10_all_new_inclusive, u128, 0, u128::max_value(), 10);
430+
uniform_sample!(uniform_u128x10_allm1_new, u128, 0, u128::max_value(), 10);
431+
uniform_sample!(uniform_u128x10_halfp1_new, u128, 0, HALF_128_BIT_UNSIGNED + 1, 10);
432+
uniform_sample!(uniform_u128x10_half_new, u128, 0, HALF_128_BIT_UNSIGNED, 10);
433+
uniform_sample!(uniform_u128x10_halfm1_new, u128, 0, HALF_128_BIT_UNSIGNED - 1, 10);
434+
uniform_sample!(uniform_u128x10_6_new, u128, 0, 6u128, 10);
435+
436+
uniform_single!(uniform_u128x10_allm1_single, u128, 0, u128::max_value(), 10);
437+
uniform_single!(uniform_u128x10_halfp1_single, u128, 0, HALF_128_BIT_UNSIGNED + 1, 10);
438+
uniform_single!(uniform_u128x10_half_single, u128, 0, HALF_128_BIT_UNSIGNED, 10);
439+
uniform_single!(uniform_u128x10_halfm1_single, u128, 0, HALF_128_BIT_UNSIGNED - 1, 10);
440+
uniform_single!(uniform_u128x10_6_single, u128, 0, 6u128, 10);

0 commit comments

Comments
 (0)