20
20
use Rng ;
21
21
22
22
/// A vector of indices.
23
- ///
23
+ ///
24
24
/// Multiple internal representations are possible.
25
25
#[ derive( Clone , Debug ) ]
26
26
pub enum IndexVec {
@@ -36,9 +36,9 @@ impl IndexVec {
36
36
& IndexVec :: USize ( ref v) => v. len ( ) ,
37
37
}
38
38
}
39
-
39
+
40
40
/// Return the value at the given `index`.
41
- ///
41
+ ///
42
42
/// (Note: we cannot implement `std::ops::Index` because of lifetime
43
43
/// restrictions.)
44
44
pub fn index ( & self , index : usize ) -> usize {
@@ -63,7 +63,7 @@ impl IndexVec {
63
63
& IndexVec :: USize ( ref v) => IndexVecIter :: USize ( v. iter ( ) ) ,
64
64
}
65
65
}
66
-
66
+
67
67
/// Convert into an iterator over the indices as a sequence of `usize` values
68
68
pub fn into_iter ( self ) -> IndexVecIntoIter {
69
69
match self {
@@ -115,7 +115,7 @@ impl<'a> Iterator for IndexVecIter<'a> {
115
115
& mut USize ( ref mut iter) => iter. next ( ) . cloned ( ) ,
116
116
}
117
117
}
118
-
118
+
119
119
fn size_hint ( & self ) -> ( usize , Option < usize > ) {
120
120
match self {
121
121
& IndexVecIter :: U32 ( ref v) => v. size_hint ( ) ,
@@ -135,15 +135,15 @@ pub enum IndexVecIntoIter {
135
135
136
136
impl Iterator for IndexVecIntoIter {
137
137
type Item = usize ;
138
-
138
+
139
139
fn next ( & mut self ) -> Option < Self :: Item > {
140
140
use self :: IndexVecIntoIter :: * ;
141
141
match self {
142
142
& mut U32 ( ref mut v) => v. next ( ) . map ( |i| i as usize ) ,
143
143
& mut USize ( ref mut v) => v. next ( ) ,
144
144
}
145
145
}
146
-
146
+
147
147
fn size_hint ( & self ) -> ( usize , Option < usize > ) {
148
148
use self :: IndexVecIntoIter :: * ;
149
149
match self {
@@ -191,7 +191,7 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
191
191
}
192
192
let amount = amount as u32 ;
193
193
let length = length as u32 ;
194
-
194
+
195
195
// Choice of algorithm here depends on both length and amount. See:
196
196
// https://github.com/rust-random/rand/pull/479
197
197
// We do some calculations with f32. Accuracy is not very important.
@@ -222,7 +222,7 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
222
222
223
223
/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
224
224
/// combination algorithm.
225
- ///
225
+ ///
226
226
/// The output values are fully shuffled. (Overhead is under 50%.)
227
227
///
228
228
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
@@ -233,7 +233,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
233
233
// amounts this is slow due to Vec::insert performance, so we shuffle
234
234
// afterwards. Benchmarks show little overhead from extra logic.
235
235
let floyd_shuffle = amount < 50 ;
236
-
236
+
237
237
debug_assert ! ( amount <= length) ;
238
238
let mut indices = Vec :: with_capacity ( amount as usize ) ;
239
239
for j in length - amount .. length {
@@ -267,7 +267,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
267
267
///
268
268
/// This allocates the entire `length` of indices and randomizes only the first `amount`.
269
269
/// It then truncates to `amount` and returns.
270
- ///
270
+ ///
271
271
/// This method is not appropriate for large `length` and potentially uses a lot
272
272
/// of memory; because of this we only implement for `u32` index (which improves
273
273
/// performance in all cases).
@@ -309,19 +309,19 @@ fn sample_rejection<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
309
309
}
310
310
indices. push ( pos) ;
311
311
}
312
-
312
+
313
313
debug_assert_eq ! ( indices. len( ) , amount) ;
314
314
IndexVec :: from ( indices)
315
315
}
316
316
317
317
#[ cfg( test) ]
318
318
mod test {
319
319
use super :: * ;
320
-
320
+
321
321
#[ test]
322
322
fn test_sample_boundaries ( ) {
323
323
let mut r = :: test:: rng ( 404 ) ;
324
-
324
+
325
325
assert_eq ! ( sample_inplace( & mut r, 0 , 0 ) . len( ) , 0 ) ;
326
326
assert_eq ! ( sample_inplace( & mut r, 1 , 0 ) . len( ) , 0 ) ;
327
327
assert_eq ! ( sample_inplace( & mut r, 1 , 1 ) . into_vec( ) , vec![ 0 ] ) ;
@@ -331,43 +331,43 @@ mod test {
331
331
assert_eq ! ( sample_floyd( & mut r, 0 , 0 ) . len( ) , 0 ) ;
332
332
assert_eq ! ( sample_floyd( & mut r, 1 , 0 ) . len( ) , 0 ) ;
333
333
assert_eq ! ( sample_floyd( & mut r, 1 , 1 ) . into_vec( ) , vec![ 0 ] ) ;
334
-
334
+
335
335
// These algorithms should be fast with big numbers. Test average.
336
336
let sum: usize = sample_rejection ( & mut r, 1 << 25 , 10 )
337
337
. into_iter ( ) . sum ( ) ;
338
338
assert ! ( 1 << 25 < sum && sum < ( 1 << 25 ) * 25 ) ;
339
-
339
+
340
340
let sum: usize = sample_floyd ( & mut r, 1 << 25 , 10 )
341
341
. into_iter ( ) . sum ( ) ;
342
342
assert ! ( 1 << 25 < sum && sum < ( 1 << 25 ) * 25 ) ;
343
343
}
344
-
344
+
345
345
#[ test]
346
346
fn test_sample_alg ( ) {
347
347
let seed_rng = :: test:: rng;
348
348
349
349
// We can't test which algorithm is used directly, but Floyd's alg
350
350
// should produce different results from the others. (Also, `inplace`
351
351
// and `cached` currently use different sizes thus produce different results.)
352
-
352
+
353
353
// A small length and relatively large amount should use inplace
354
354
let ( length, amount) : ( usize , usize ) = ( 100 , 50 ) ;
355
355
let v1 = sample ( & mut seed_rng ( 420 ) , length, amount) ;
356
356
let v2 = sample_inplace ( & mut seed_rng ( 420 ) , length as u32 , amount as u32 ) ;
357
357
assert ! ( v1. iter( ) . all( |e| e < length) ) ;
358
358
assert_eq ! ( v1, v2) ;
359
-
359
+
360
360
// Test Floyd's alg does produce different results
361
361
let v3 = sample_floyd ( & mut seed_rng ( 420 ) , length as u32 , amount as u32 ) ;
362
362
assert ! ( v1 != v3) ;
363
-
363
+
364
364
// A large length and small amount should use Floyd
365
365
let ( length, amount) : ( usize , usize ) = ( 1 <<20 , 50 ) ;
366
366
let v1 = sample ( & mut seed_rng ( 421 ) , length, amount) ;
367
367
let v2 = sample_floyd ( & mut seed_rng ( 421 ) , length as u32 , amount as u32 ) ;
368
368
assert ! ( v1. iter( ) . all( |e| e < length) ) ;
369
369
assert_eq ! ( v1, v2) ;
370
-
370
+
371
371
// A large length and larger amount should use cache
372
372
let ( length, amount) : ( usize , usize ) = ( 1 <<20 , 600 ) ;
373
373
let v1 = sample ( & mut seed_rng ( 422 ) , length, amount) ;
0 commit comments