@@ -17,10 +17,11 @@ extern crate test;
17
17
const RAND_BENCH_N : u64 = 1000 ;
18
18
19
19
use rand:: distributions:: { Alphanumeric , Open01 , OpenClosed01 , Standard , Uniform } ;
20
+ use rand:: distributions:: uniform:: { UniformInt , UniformSampler } ;
20
21
use std:: mem:: size_of;
21
22
use std:: num:: { NonZeroU128 , NonZeroU16 , NonZeroU32 , NonZeroU64 , NonZeroU8 } ;
22
23
use std:: time:: Duration ;
23
- use test:: Bencher ;
24
+ use test:: { Bencher , black_box } ;
24
25
25
26
use rand:: prelude:: * ;
26
27
@@ -207,11 +208,22 @@ macro_rules! gen_range_int {
207
208
} ;
208
209
}
209
210
210
- gen_range_int ! ( gen_range_i8, i8 , -20i8 , 100 ) ;
211
- gen_range_int ! ( gen_range_i16, i16 , -500i16 , 2000 ) ;
212
- gen_range_int ! ( gen_range_i32, i32 , -200_000_000i32 , 800_000_000 ) ;
213
- gen_range_int ! ( gen_range_i64, i64 , 3i64 , 123_456_789_123 ) ;
214
- gen_range_int ! ( gen_range_i128, i128 , -12345678901234i128 , 123_456_789_123_456_789 ) ;
211
+ // Algorithms such as Fisher–Yates shuffle often require uniform values from an
212
+ // incrementing range 0..n. We use -1..n here to prevent wrapping in the test
213
+ // from generating a 0-sized range.
214
+ gen_range_int ! ( gen_range_i8_low, i8 , -1i8 , 0 ) ;
215
+ gen_range_int ! ( gen_range_i16_low, i16 , -1i16 , 0 ) ;
216
+ gen_range_int ! ( gen_range_i32_low, i32 , -1i32 , 0 ) ;
217
+ gen_range_int ! ( gen_range_i64_low, i64 , -1i64 , 0 ) ;
218
+ gen_range_int ! ( gen_range_i128_low, i128 , -1i128 , 0 ) ;
219
+
220
+ // These were the initially tested ranges. They are likely to see fewer
221
+ // rejections than the low tests.
222
+ gen_range_int ! ( gen_range_i8_high, i8 , -20i8 , 100 ) ;
223
+ gen_range_int ! ( gen_range_i16_high, i16 , -500i16 , 2000 ) ;
224
+ gen_range_int ! ( gen_range_i32_high, i32 , -200_000_000i32 , 800_000_000 ) ;
225
+ gen_range_int ! ( gen_range_i64_high, i64 , 3i64 , 123_456_789_123 ) ;
226
+ gen_range_int ! ( gen_range_i128_high, i128 , -12345678901234i128 , 123_456_789_123_456_789 ) ;
215
227
216
228
// construct and sample from a floating-point range
217
229
macro_rules! gen_range_float {
@@ -239,3 +251,190 @@ macro_rules! gen_range_float {
239
251
240
252
gen_range_float ! ( gen_range_f32, f32 , -20000.0f32 , 100000.0 ) ;
241
253
gen_range_float ! ( gen_range_f64, f64 , 123.456f64 , 7890.12 ) ;
254
+
255
+
256
+ // In src/distributions/uniform.rs, we say:
257
+ // Implementation of [`uniform_single`] is optional, and is only useful when
258
+ // the implementation can be faster than `Self::new(low, high).sample(rng)`.
259
+
260
+ // `UniformSampler::uniform_single` compromises on the rejection range to be
261
+ // faster. This benchmark demonstrates both the speed gain of doing this, and
262
+ // the worst case behavior.
263
+
264
+ /// Sample random values from a pre-existing distribution. This uses the
265
+ /// half open `new` to be equivalent to the behavior of `uniform_single`.
266
+ macro_rules! uniform_sample {
267
+ ( $fnn: ident, $type: ident, $low: expr, $high: expr, $count: expr) => {
268
+ #[ bench]
269
+ fn $fnn( b: & mut Bencher ) {
270
+ let mut rng = Pcg64Mcg :: from_entropy( ) ;
271
+ let low = black_box( $low) ;
272
+ let high = black_box( $high) ;
273
+ b. iter( || {
274
+ for _ in 0 ..10 {
275
+ let dist = UniformInt :: <$type>:: new( low, high) ;
276
+ for _ in 0 ..$count {
277
+ black_box( dist. sample( & mut rng) ) ;
278
+ }
279
+ }
280
+ } ) ;
281
+ }
282
+ } ;
283
+ }
284
+
285
+ macro_rules! uniform_inclusive {
286
+ ( $fnn: ident, $type: ident, $low: expr, $high: expr, $count: expr) => {
287
+ #[ bench]
288
+ fn $fnn( b: & mut Bencher ) {
289
+ let mut rng = Pcg64Mcg :: from_entropy( ) ;
290
+ let low = black_box( $low) ;
291
+ let high = black_box( $high) ;
292
+ b. iter( || {
293
+ for _ in 0 ..10 {
294
+ let dist = UniformInt :: <$type>:: new_inclusive( low, high) ;
295
+ for _ in 0 ..$count {
296
+ black_box( dist. sample( & mut rng) ) ;
297
+ }
298
+ }
299
+ } ) ;
300
+ }
301
+ } ;
302
+ }
303
+
304
+ /// Use `uniform_single` to create a one-off random value
305
+ macro_rules! uniform_single {
306
+ ( $fnn: ident, $type: ident, $low: expr, $high: expr, $count: expr) => {
307
+ #[ bench]
308
+ fn $fnn( b: & mut Bencher ) {
309
+ let mut rng = Pcg64Mcg :: from_entropy( ) ;
310
+ let low = black_box( $low) ;
311
+ let high = black_box( $high) ;
312
+ b. iter( || {
313
+ for _ in 0 ..( 10 * $count) {
314
+ black_box( UniformInt :: <$type>:: sample_single( low, high, & mut rng) ) ;
315
+ }
316
+ } ) ;
317
+ }
318
+ } ;
319
+ }
320
+
321
+
322
+ // Benchmark:
323
+ // n: can use the full generated range
324
+ // (n-1): only the max value is rejected: expect this to be fast
325
+ // n/2+1: almost half of the values are rejected, and we can do no better
326
+ // n/2: approximation rejects half the values but powers of 2 could have no rejection
327
+ // n/2-1: only a few values are rejected: expect this to be fast
328
+ // 6: approximation rejects 25% of values but could be faster. However modulo by
329
+ // low numbers is typically more expensive
330
+
331
+ // With the use of u32 as the minimum generated width, the worst-case u16 range
332
+ // (32769) will only reject 32769 / 4294967296 samples.
333
+ const HALF_16_BIT_UNSIGNED : u16 = 1 << 15 ;
334
+
335
+ uniform_sample ! ( uniform_u16x1_allm1_new, u16 , 0 , u16 :: max_value( ) , 1 ) ;
336
+ uniform_sample ! ( uniform_u16x1_halfp1_new, u16 , 0 , HALF_16_BIT_UNSIGNED + 1 , 1 ) ;
337
+ uniform_sample ! ( uniform_u16x1_half_new, u16 , 0 , HALF_16_BIT_UNSIGNED , 1 ) ;
338
+ uniform_sample ! ( uniform_u16x1_halfm1_new, u16 , 0 , HALF_16_BIT_UNSIGNED - 1 , 1 ) ;
339
+ uniform_sample ! ( uniform_u16x1_6_new, u16 , 0 , 6u16 , 1 ) ;
340
+
341
+ uniform_single ! ( uniform_u16x1_allm1_single, u16 , 0 , u16 :: max_value( ) , 1 ) ;
342
+ uniform_single ! ( uniform_u16x1_halfp1_single, u16 , 0 , HALF_16_BIT_UNSIGNED + 1 , 1 ) ;
343
+ uniform_single ! ( uniform_u16x1_half_single, u16 , 0 , HALF_16_BIT_UNSIGNED , 1 ) ;
344
+ uniform_single ! ( uniform_u16x1_halfm1_single, u16 , 0 , HALF_16_BIT_UNSIGNED - 1 , 1 ) ;
345
+ uniform_single ! ( uniform_u16x1_6_single, u16 , 0 , 6u16 , 1 ) ;
346
+
347
+ uniform_inclusive ! ( uniform_u16x10_all_new_inclusive, u16 , 0 , u16 :: max_value( ) , 10 ) ;
348
+ uniform_sample ! ( uniform_u16x10_allm1_new, u16 , 0 , u16 :: max_value( ) , 10 ) ;
349
+ uniform_sample ! ( uniform_u16x10_halfp1_new, u16 , 0 , HALF_16_BIT_UNSIGNED + 1 , 10 ) ;
350
+ uniform_sample ! ( uniform_u16x10_half_new, u16 , 0 , HALF_16_BIT_UNSIGNED , 10 ) ;
351
+ uniform_sample ! ( uniform_u16x10_halfm1_new, u16 , 0 , HALF_16_BIT_UNSIGNED - 1 , 10 ) ;
352
+ uniform_sample ! ( uniform_u16x10_6_new, u16 , 0 , 6u16 , 10 ) ;
353
+
354
+ uniform_single ! ( uniform_u16x10_allm1_single, u16 , 0 , u16 :: max_value( ) , 10 ) ;
355
+ uniform_single ! ( uniform_u16x10_halfp1_single, u16 , 0 , HALF_16_BIT_UNSIGNED + 1 , 10 ) ;
356
+ uniform_single ! ( uniform_u16x10_half_single, u16 , 0 , HALF_16_BIT_UNSIGNED , 10 ) ;
357
+ uniform_single ! ( uniform_u16x10_halfm1_single, u16 , 0 , HALF_16_BIT_UNSIGNED - 1 , 10 ) ;
358
+ uniform_single ! ( uniform_u16x10_6_single, u16 , 0 , 6u16 , 10 ) ;
359
+
360
+
361
+ const HALF_32_BIT_UNSIGNED : u32 = 1 << 31 ;
362
+
363
+ uniform_sample ! ( uniform_u32x1_allm1_new, u32 , 0 , u32 :: max_value( ) , 1 ) ;
364
+ uniform_sample ! ( uniform_u32x1_halfp1_new, u32 , 0 , HALF_32_BIT_UNSIGNED + 1 , 1 ) ;
365
+ uniform_sample ! ( uniform_u32x1_half_new, u32 , 0 , HALF_32_BIT_UNSIGNED , 1 ) ;
366
+ uniform_sample ! ( uniform_u32x1_halfm1_new, u32 , 0 , HALF_32_BIT_UNSIGNED - 1 , 1 ) ;
367
+ uniform_sample ! ( uniform_u32x1_6_new, u32 , 0 , 6u32 , 1 ) ;
368
+
369
+ uniform_single ! ( uniform_u32x1_allm1_single, u32 , 0 , u32 :: max_value( ) , 1 ) ;
370
+ uniform_single ! ( uniform_u32x1_halfp1_single, u32 , 0 , HALF_32_BIT_UNSIGNED + 1 , 1 ) ;
371
+ uniform_single ! ( uniform_u32x1_half_single, u32 , 0 , HALF_32_BIT_UNSIGNED , 1 ) ;
372
+ uniform_single ! ( uniform_u32x1_halfm1_single, u32 , 0 , HALF_32_BIT_UNSIGNED - 1 , 1 ) ;
373
+ uniform_single ! ( uniform_u32x1_6_single, u32 , 0 , 6u32 , 1 ) ;
374
+
375
+ uniform_inclusive ! ( uniform_u32x10_all_new_inclusive, u32 , 0 , u32 :: max_value( ) , 10 ) ;
376
+ uniform_sample ! ( uniform_u32x10_allm1_new, u32 , 0 , u32 :: max_value( ) , 10 ) ;
377
+ uniform_sample ! ( uniform_u32x10_halfp1_new, u32 , 0 , HALF_32_BIT_UNSIGNED + 1 , 10 ) ;
378
+ uniform_sample ! ( uniform_u32x10_half_new, u32 , 0 , HALF_32_BIT_UNSIGNED , 10 ) ;
379
+ uniform_sample ! ( uniform_u32x10_halfm1_new, u32 , 0 , HALF_32_BIT_UNSIGNED - 1 , 10 ) ;
380
+ uniform_sample ! ( uniform_u32x10_6_new, u32 , 0 , 6u32 , 10 ) ;
381
+
382
+ uniform_single ! ( uniform_u32x10_allm1_single, u32 , 0 , u32 :: max_value( ) , 10 ) ;
383
+ uniform_single ! ( uniform_u32x10_halfp1_single, u32 , 0 , HALF_32_BIT_UNSIGNED + 1 , 10 ) ;
384
+ uniform_single ! ( uniform_u32x10_half_single, u32 , 0 , HALF_32_BIT_UNSIGNED , 10 ) ;
385
+ uniform_single ! ( uniform_u32x10_halfm1_single, u32 , 0 , HALF_32_BIT_UNSIGNED - 1 , 10 ) ;
386
+ uniform_single ! ( uniform_u32x10_6_single, u32 , 0 , 6u32 , 10 ) ;
387
+
388
+ const HALF_64_BIT_UNSIGNED : u64 = 1 << 63 ;
389
+
390
+ uniform_sample ! ( uniform_u64x1_allm1_new, u64 , 0 , u64 :: max_value( ) , 1 ) ;
391
+ uniform_sample ! ( uniform_u64x1_halfp1_new, u64 , 0 , HALF_64_BIT_UNSIGNED + 1 , 1 ) ;
392
+ uniform_sample ! ( uniform_u64x1_half_new, u64 , 0 , HALF_64_BIT_UNSIGNED , 1 ) ;
393
+ uniform_sample ! ( uniform_u64x1_halfm1_new, u64 , 0 , HALF_64_BIT_UNSIGNED - 1 , 1 ) ;
394
+ uniform_sample ! ( uniform_u64x1_6_new, u64 , 0 , 6u64 , 1 ) ;
395
+
396
+ uniform_single ! ( uniform_u64x1_allm1_single, u64 , 0 , u64 :: max_value( ) , 1 ) ;
397
+ uniform_single ! ( uniform_u64x1_halfp1_single, u64 , 0 , HALF_64_BIT_UNSIGNED + 1 , 1 ) ;
398
+ uniform_single ! ( uniform_u64x1_half_single, u64 , 0 , HALF_64_BIT_UNSIGNED , 1 ) ;
399
+ uniform_single ! ( uniform_u64x1_halfm1_single, u64 , 0 , HALF_64_BIT_UNSIGNED - 1 , 1 ) ;
400
+ uniform_single ! ( uniform_u64x1_6_single, u64 , 0 , 6u64 , 1 ) ;
401
+
402
+ uniform_inclusive ! ( uniform_u64x10_all_new_inclusive, u64 , 0 , u64 :: max_value( ) , 10 ) ;
403
+ uniform_sample ! ( uniform_u64x10_allm1_new, u64 , 0 , u64 :: max_value( ) , 10 ) ;
404
+ uniform_sample ! ( uniform_u64x10_halfp1_new, u64 , 0 , HALF_64_BIT_UNSIGNED + 1 , 10 ) ;
405
+ uniform_sample ! ( uniform_u64x10_half_new, u64 , 0 , HALF_64_BIT_UNSIGNED , 10 ) ;
406
+ uniform_sample ! ( uniform_u64x10_halfm1_new, u64 , 0 , HALF_64_BIT_UNSIGNED - 1 , 10 ) ;
407
+ uniform_sample ! ( uniform_u64x10_6_new, u64 , 0 , 6u64 , 10 ) ;
408
+
409
+ uniform_single ! ( uniform_u64x10_allm1_single, u64 , 0 , u64 :: max_value( ) , 10 ) ;
410
+ uniform_single ! ( uniform_u64x10_halfp1_single, u64 , 0 , HALF_64_BIT_UNSIGNED + 1 , 10 ) ;
411
+ uniform_single ! ( uniform_u64x10_half_single, u64 , 0 , HALF_64_BIT_UNSIGNED , 10 ) ;
412
+ uniform_single ! ( uniform_u64x10_halfm1_single, u64 , 0 , HALF_64_BIT_UNSIGNED - 1 , 10 ) ;
413
+ uniform_single ! ( uniform_u64x10_6_single, u64 , 0 , 6u64 , 10 ) ;
414
+
415
+ const HALF_128_BIT_UNSIGNED : u128 = 1 << 127 ;
416
+
417
+ uniform_sample ! ( uniform_u128x1_allm1_new, u128 , 0 , u128 :: max_value( ) , 1 ) ;
418
+ uniform_sample ! ( uniform_u128x1_halfp1_new, u128 , 0 , HALF_128_BIT_UNSIGNED + 1 , 1 ) ;
419
+ uniform_sample ! ( uniform_u128x1_half_new, u128 , 0 , HALF_128_BIT_UNSIGNED , 1 ) ;
420
+ uniform_sample ! ( uniform_u128x1_halfm1_new, u128 , 0 , HALF_128_BIT_UNSIGNED - 1 , 1 ) ;
421
+ uniform_sample ! ( uniform_u128x1_6_new, u128 , 0 , 6u128 , 1 ) ;
422
+
423
+ uniform_single ! ( uniform_u128x1_allm1_single, u128 , 0 , u128 :: max_value( ) , 1 ) ;
424
+ uniform_single ! ( uniform_u128x1_halfp1_single, u128 , 0 , HALF_128_BIT_UNSIGNED + 1 , 1 ) ;
425
+ uniform_single ! ( uniform_u128x1_half_single, u128 , 0 , HALF_128_BIT_UNSIGNED , 1 ) ;
426
+ uniform_single ! ( uniform_u128x1_halfm1_single, u128 , 0 , HALF_128_BIT_UNSIGNED - 1 , 1 ) ;
427
+ uniform_single ! ( uniform_u128x1_6_single, u128 , 0 , 6u128 , 1 ) ;
428
+
429
+ uniform_inclusive ! ( uniform_u128x10_all_new_inclusive, u128 , 0 , u128 :: max_value( ) , 10 ) ;
430
+ uniform_sample ! ( uniform_u128x10_allm1_new, u128 , 0 , u128 :: max_value( ) , 10 ) ;
431
+ uniform_sample ! ( uniform_u128x10_halfp1_new, u128 , 0 , HALF_128_BIT_UNSIGNED + 1 , 10 ) ;
432
+ uniform_sample ! ( uniform_u128x10_half_new, u128 , 0 , HALF_128_BIT_UNSIGNED , 10 ) ;
433
+ uniform_sample ! ( uniform_u128x10_halfm1_new, u128 , 0 , HALF_128_BIT_UNSIGNED - 1 , 10 ) ;
434
+ uniform_sample ! ( uniform_u128x10_6_new, u128 , 0 , 6u128 , 10 ) ;
435
+
436
+ uniform_single ! ( uniform_u128x10_allm1_single, u128 , 0 , u128 :: max_value( ) , 10 ) ;
437
+ uniform_single ! ( uniform_u128x10_halfp1_single, u128 , 0 , HALF_128_BIT_UNSIGNED + 1 , 10 ) ;
438
+ uniform_single ! ( uniform_u128x10_half_single, u128 , 0 , HALF_128_BIT_UNSIGNED , 10 ) ;
439
+ uniform_single ! ( uniform_u128x10_halfm1_single, u128 , 0 , HALF_128_BIT_UNSIGNED - 1 , 10 ) ;
440
+ uniform_single ! ( uniform_u128x10_6_single, u128 , 0 , 6u128 , 10 ) ;
0 commit comments