@@ -158,21 +158,15 @@ impl Iterator for IndexVecIntoIter {
158
158
impl ExactSizeIterator for IndexVecIntoIter { }
159
159
160
160
161
- /// Randomly sample exactly `amount` distinct indices from `0..length`.
162
- ///
163
- /// If `shuffled == true` then the sampled values will be fully shuffled;
164
- /// otherwise the values may only partially shuffled, depending on the
165
- /// algorithm used (i.e. biases may exist in the ordering of sampled elements).
166
- /// Depending on the algorithm used internally, full shuffling may add
167
- /// significant overhead for `amount` > 10 or so, but not more than double
168
- /// the time and often much less.
161
+ /// Randomly sample exactly `amount` distinct indices from `0..length`, and
162
+ /// return them in random order (fully shuffled).
169
163
///
170
164
/// This method is used internally by the slice sampling methods, but it can
171
165
/// sometimes be useful to have the indices themselves so this is provided as
172
166
/// an alternative.
173
167
///
174
168
/// The implementation used is not specified; we automatically select the
175
- /// fastest available implementation for the `length` and `amount` parameters
169
+ /// fastest available algorithm for the `length` and `amount` parameters
176
170
/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
177
171
/// complexity is `O(amount)`, except that when `amount` is small, performance
178
172
/// is closer to `O(amount^2)`, and when `length` is close to `amount` then
@@ -186,8 +180,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
186
180
/// to adapt the internal `sample_floyd` implementation.
187
181
///
188
182
/// Panics if `amount > length`.
189
- pub fn sample < R > ( rng : & mut R , length : usize , amount : usize ,
190
- shuffled : bool ) -> IndexVec
183
+ pub fn sample < R > ( rng : & mut R , length : usize , amount : usize ) -> IndexVec
191
184
where R : Rng + ?Sized ,
192
185
{
193
186
if amount > length {
@@ -205,16 +198,16 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
205
198
// https://github.com/rust-lang-nursery/rand/pull/479
206
199
// We do some calculations with f32. Accuracy is not very important.
207
200
208
- if amount < 217 {
209
- const C : [ [ f32 ; 2 ] ; 2 ] = [ [ 1.2 , 6 .0 /45.0 ] , [ 10.0 , 70.0 /9.0 ] ] ;
201
+ if amount < 163 {
202
+ const C : [ [ f32 ; 2 ] ; 2 ] = [ [ 1.6 , 8 .0 /45.0 ] , [ 10.0 , 70.0 /9.0 ] ] ;
210
203
let j = if length < 500_000 { 0 } else { 1 } ;
211
204
let amount_fp = amount as f32 ;
212
205
let m4 = C [ 0 ] [ j] * amount_fp;
213
206
// Short-cut: when amount < 12, floyd's is always faster
214
207
if amount > 11 && ( length as f32 ) < ( C [ 1 ] [ j] + m4) * amount_fp {
215
208
sample_inplace ( rng, length, amount)
216
209
} else {
217
- sample_floyd ( rng, length, amount, shuffled )
210
+ sample_floyd ( rng, length, amount)
218
211
}
219
212
} else {
220
213
const C : [ f32 ; 2 ] = [ 270.0 , 330.0 /9.0 ] ;
@@ -232,29 +225,50 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
232
225
/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
233
226
/// combination algorithm.
234
227
///
235
- /// If `shuffled == false`, the values are only partially shuffled (i.e. biases
236
- /// exist in the ordering of sampled elements). If `shuffled == true`, the
237
- /// values are fully shuffled.
228
+ /// The output values are fully shuffled. (Overhead is under 50%.)
238
229
///
239
230
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
240
- fn sample_floyd < R > ( rng : & mut R , length : u32 , amount : u32 , shuffled : bool ) -> IndexVec
231
+ fn sample_floyd < R > ( rng : & mut R , length : u32 , amount : u32 ) -> IndexVec
241
232
where R : Rng + ?Sized ,
242
233
{
234
+ // Shouldn't this be on std::slice?
235
+ fn find_pos < T : Copy + PartialEq < T > > ( slice : & [ T ] , elt : T ) -> Option < usize > {
236
+ for i in 0 ..slice. len ( ) {
237
+ if slice[ i] == elt {
238
+ return Some ( i) ;
239
+ }
240
+ }
241
+ None
242
+ }
243
+
244
+ // For small amount we use Floyd's fully-shuffled variant. For larger
245
+ // amounts this is slow due to Vec::insert performance, so we shuffle
246
+ // afterwards. Benchmarks show little overhead from extra logic.
247
+ let floyd_shuffle = amount < 50 ;
248
+
243
249
debug_assert ! ( amount <= length) ;
244
250
let mut indices = Vec :: with_capacity ( amount as usize ) ;
245
251
for j in length - amount .. length {
246
252
let t = rng. gen_range ( 0 , j + 1 ) ;
247
- if indices. contains ( & t) {
248
- indices. push ( j)
253
+ if floyd_shuffle {
254
+ if let Some ( pos) = find_pos ( & indices, t) {
255
+ indices. insert ( pos, j) ;
256
+ continue ;
257
+ }
249
258
} else {
250
- indices. push ( t)
251
- } ;
259
+ if indices. contains ( & t) {
260
+ indices. push ( j) ;
261
+ continue ;
262
+ }
263
+ }
264
+ indices. push ( t) ;
252
265
}
253
- if shuffled {
254
- // Note that there is a variant of Floyd's algorithm with native full
255
- // shuffling, but it is slow because it requires arbitrary insertions.
256
- use super :: SliceRandom ;
257
- indices. shuffle ( rng) ;
266
+ if !floyd_shuffle {
267
+ // Reimplement SliceRandom::shuffle with smaller indices
268
+ for i in ( 1 ..amount) . rev ( ) {
269
+ // invariant: elements with index > i have been locked in place.
270
+ indices. swap ( i as usize , rng. gen_range ( 0 , i + 1 ) as usize ) ;
271
+ }
258
272
}
259
273
IndexVec :: from ( indices)
260
274
}
@@ -270,9 +284,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Ind
270
284
/// of memory; because of this we only implement for `u32` index (which improves
271
285
/// performance in all cases).
272
286
///
273
- /// This is likely the fastest for small lengths since it avoids the need for
274
- /// allocations. Set-up is `O(length)` time and memory and shuffling is
275
- /// `O(amount)` time.
287
+ /// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
276
288
fn sample_inplace < R > ( rng : & mut R , length : u32 , amount : u32 ) -> IndexVec
277
289
where R : Rng + ?Sized ,
278
290
{
@@ -330,16 +342,16 @@ mod test {
330
342
331
343
assert_eq ! ( sample_rejection( & mut r, 1 , 0 ) . len( ) , 0 ) ;
332
344
333
- assert_eq ! ( sample_floyd( & mut r, 0 , 0 , false ) . len( ) , 0 ) ;
334
- assert_eq ! ( sample_floyd( & mut r, 1 , 0 , false ) . len( ) , 0 ) ;
335
- assert_eq ! ( sample_floyd( & mut r, 1 , 1 , false ) . into_vec( ) , vec![ 0 ] ) ;
345
+ assert_eq ! ( sample_floyd( & mut r, 0 , 0 ) . len( ) , 0 ) ;
346
+ assert_eq ! ( sample_floyd( & mut r, 1 , 0 ) . len( ) , 0 ) ;
347
+ assert_eq ! ( sample_floyd( & mut r, 1 , 1 ) . into_vec( ) , vec![ 0 ] ) ;
336
348
337
349
// These algorithms should be fast with big numbers. Test average.
338
350
let sum: usize = sample_rejection ( & mut r, 1 << 25 , 10 )
339
351
. into_iter ( ) . sum ( ) ;
340
352
assert ! ( 1 << 25 < sum && sum < ( 1 << 25 ) * 25 ) ;
341
353
342
- let sum: usize = sample_floyd ( & mut r, 1 << 25 , 10 , false )
354
+ let sum: usize = sample_floyd ( & mut r, 1 << 25 , 10 )
343
355
. into_iter ( ) . sum ( ) ;
344
356
assert ! ( 1 << 25 < sum && sum < ( 1 << 25 ) * 25 ) ;
345
357
}
@@ -358,27 +370,27 @@ mod test {
358
370
// A small length and relatively large amount should use inplace
359
371
r. fill ( & mut seed) ;
360
372
let ( length, amount) : ( usize , usize ) = ( 100 , 50 ) ;
361
- let v1 = sample ( & mut xor_rng ( seed) , length, amount, true ) ;
373
+ let v1 = sample ( & mut xor_rng ( seed) , length, amount) ;
362
374
let v2 = sample_inplace ( & mut xor_rng ( seed) , length as u32 , amount as u32 ) ;
363
375
assert ! ( v1. iter( ) . all( |e| e < length) ) ;
364
376
assert_eq ! ( v1, v2) ;
365
377
366
378
// Test Floyd's alg does produce different results
367
- let v3 = sample_floyd ( & mut xor_rng ( seed) , length as u32 , amount as u32 , true ) ;
379
+ let v3 = sample_floyd ( & mut xor_rng ( seed) , length as u32 , amount as u32 ) ;
368
380
assert ! ( v1 != v3) ;
369
381
370
382
// A large length and small amount should use Floyd
371
383
r. fill ( & mut seed) ;
372
384
let ( length, amount) : ( usize , usize ) = ( 1 <<20 , 50 ) ;
373
- let v1 = sample ( & mut xor_rng ( seed) , length, amount, true ) ;
374
- let v2 = sample_floyd ( & mut xor_rng ( seed) , length as u32 , amount as u32 , true ) ;
385
+ let v1 = sample ( & mut xor_rng ( seed) , length, amount) ;
386
+ let v2 = sample_floyd ( & mut xor_rng ( seed) , length as u32 , amount as u32 ) ;
375
387
assert ! ( v1. iter( ) . all( |e| e < length) ) ;
376
388
assert_eq ! ( v1, v2) ;
377
389
378
390
// A large length and larger amount should use cache
379
391
r. fill ( & mut seed) ;
380
392
let ( length, amount) : ( usize , usize ) = ( 1 <<20 , 600 ) ;
381
- let v1 = sample ( & mut xor_rng ( seed) , length, amount, true ) ;
393
+ let v1 = sample ( & mut xor_rng ( seed) , length, amount) ;
382
394
let v2 = sample_rejection ( & mut xor_rng ( seed) , length, amount) ;
383
395
assert ! ( v1. iter( ) . all( |e| e < length) ) ;
384
396
assert_eq ! ( v1, v2) ;
0 commit comments