Skip to content

Commit 19897e5

Browse files
committed
sample_indices: always shuffle. Floyd's alg: optimise.
1 parent 805022c commit 19897e5

File tree

3 files changed

+64
-59
lines changed

3 files changed

+64
-59
lines changed

benches/seq.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ macro_rules! seq_slice_choose_multiple {
3939
// Collect full result to prevent unwanted shortcuts getting
4040
// first element (in case sample_indices returns an iterator).
4141
for (slot, sample) in result.iter_mut().zip(
42-
x.choose_multiple(&mut rng, $amount, false)) {
42+
x.choose_multiple(&mut rng, $amount)) {
4343
*slot = *sample;
4444
}
4545
result[$amount-1]
@@ -87,7 +87,7 @@ macro_rules! sample_indices {
8787
fn $name(b: &mut Bencher) {
8888
let mut rng = SmallRng::from_rng(thread_rng()).unwrap();
8989
b.iter(|| {
90-
index::$fn(&mut rng, $length, $amount, false)
90+
index::$fn(&mut rng, $length, $amount)
9191
})
9292
}
9393
}
@@ -98,5 +98,6 @@ sample_indices!(misc_sample_indices_10_of_1k, sample, 10, 1000);
9898
sample_indices!(misc_sample_indices_100_of_1k, sample, 100, 1000);
9999
sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1000_000);
100100
sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1000_000_000);
101+
sample_indices!(misc_sample_indices_200_of_1G, sample, 200, 1000_000_000);
101102
sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1000_000_000);
102103
sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1000_000_000);

src/seq/index.rs

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -158,21 +158,15 @@ impl Iterator for IndexVecIntoIter {
158158
impl ExactSizeIterator for IndexVecIntoIter {}
159159

160160

161-
/// Randomly sample exactly `amount` distinct indices from `0..length`.
162-
///
163-
/// If `shuffled == true` then the sampled values will be fully shuffled;
164-
/// otherwise the values may only partially shuffled, depending on the
165-
/// algorithm used (i.e. biases may exist in the ordering of sampled elements).
166-
/// Depending on the algorithm used internally, full shuffling may add
167-
/// significant overhead for `amount` > 10 or so, but not more than double
168-
/// the time and often much less.
161+
/// Randomly sample exactly `amount` distinct indices from `0..length`, and
162+
/// return them in random order (fully shuffled).
169163
///
170164
/// This method is used internally by the slice sampling methods, but it can
171165
/// sometimes be useful to have the indices themselves so this is provided as
172166
/// an alternative.
173167
///
174168
/// The implementation used is not specified; we automatically select the
175-
/// fastest available implementation for the `length` and `amount` parameters
169+
/// fastest available algorithm for the `length` and `amount` parameters
176170
/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
177171
/// complexity is `O(amount)`, except that when `amount` is small, performance
178172
/// is closer to `O(amount^2)`, and when `length` is close to `amount` then
@@ -186,8 +180,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
186180
/// to adapt the internal `sample_floyd` implementation.
187181
///
188182
/// Panics if `amount > length`.
189-
pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
190-
shuffled: bool) -> IndexVec
183+
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
191184
where R: Rng + ?Sized,
192185
{
193186
if amount > length {
@@ -205,16 +198,16 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
205198
// https://github.com/rust-lang-nursery/rand/pull/479
206199
// We do some calculations with f32. Accuracy is not very important.
207200

208-
if amount < 217 {
209-
const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]];
201+
if amount < 163 {
202+
const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]];
210203
let j = if length < 500_000 { 0 } else { 1 };
211204
let amount_fp = amount as f32;
212205
let m4 = C[0][j] * amount_fp;
213206
// Short-cut: when amount < 12, floyd's is always faster
214207
if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
215208
sample_inplace(rng, length, amount)
216209
} else {
217-
sample_floyd(rng, length, amount, shuffled)
210+
sample_floyd(rng, length, amount)
218211
}
219212
} else {
220213
const C: [f32; 2] = [270.0, 330.0/9.0];
@@ -232,29 +225,50 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
232225
/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
233226
/// combination algorithm.
234227
///
235-
/// If `shuffled == false`, the values are only partially shuffled (i.e. biases
236-
/// exist in the ordering of sampled elements). If `shuffled == true`, the
237-
/// values are fully shuffled.
228+
/// The output values are fully shuffled. (Overhead is under 50%.)
238229
///
239230
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
240-
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec
231+
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
241232
where R: Rng + ?Sized,
242233
{
234+
// Shouldn't this be on std::slice?
235+
fn find_pos<T: Copy + PartialEq<T>>(slice: &[T], elt: T) -> Option<usize> {
236+
for i in 0..slice.len() {
237+
if slice[i] == elt {
238+
return Some(i);
239+
}
240+
}
241+
None
242+
}
243+
244+
// For small amount we use Floyd's fully-shuffled variant. For larger
245+
// amounts this is slow due to Vec::insert performance, so we shuffle
246+
// afterwards. Benchmarks show little overhead from extra logic.
247+
let floyd_shuffle = amount < 50;
248+
243249
debug_assert!(amount <= length);
244250
let mut indices = Vec::with_capacity(amount as usize);
245251
for j in length - amount .. length {
246252
let t = rng.gen_range(0, j + 1);
247-
if indices.contains(&t) {
248-
indices.push(j)
253+
if floyd_shuffle {
254+
if let Some(pos) = find_pos(&indices, t) {
255+
indices.insert(pos, j);
256+
continue;
257+
}
249258
} else {
250-
indices.push(t)
251-
};
259+
if indices.contains(&t) {
260+
indices.push(j);
261+
continue;
262+
}
263+
}
264+
indices.push(t);
252265
}
253-
if shuffled {
254-
// Note that there is a variant of Floyd's algorithm with native full
255-
// shuffling, but it is slow because it requires arbitrary insertions.
256-
use super::SliceRandom;
257-
indices.shuffle(rng);
266+
if !floyd_shuffle {
267+
// Reimplement SliceRandom::shuffle with smaller indices
268+
for i in (1..amount).rev() {
269+
// invariant: elements with index > i have been locked in place.
270+
indices.swap(i as usize, rng.gen_range(0, i + 1) as usize);
271+
}
258272
}
259273
IndexVec::from(indices)
260274
}
@@ -270,9 +284,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Ind
270284
/// of memory; because of this we only implement for `u32` index (which improves
271285
/// performance in all cases).
272286
///
273-
/// This is likely the fastest for small lengths since it avoids the need for
274-
/// allocations. Set-up is `O(length)` time and memory and shuffling is
275-
/// `O(amount)` time.
287+
/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
276288
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
277289
where R: Rng + ?Sized,
278290
{
@@ -330,16 +342,16 @@ mod test {
330342

331343
assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0);
332344

333-
assert_eq!(sample_floyd(&mut r, 0, 0, false).len(), 0);
334-
assert_eq!(sample_floyd(&mut r, 1, 0, false).len(), 0);
335-
assert_eq!(sample_floyd(&mut r, 1, 1, false).into_vec(), vec![0]);
345+
assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
346+
assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
347+
assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);
336348

337349
// These algorithms should be fast with big numbers. Test average.
338350
let sum: usize = sample_rejection(&mut r, 1 << 25, 10)
339351
.into_iter().sum();
340352
assert!(1 << 25 < sum && sum < (1 << 25) * 25);
341353

342-
let sum: usize = sample_floyd(&mut r, 1 << 25, 10, false)
354+
let sum: usize = sample_floyd(&mut r, 1 << 25, 10)
343355
.into_iter().sum();
344356
assert!(1 << 25 < sum && sum < (1 << 25) * 25);
345357
}
@@ -358,27 +370,27 @@ mod test {
358370
// A small length and relatively large amount should use inplace
359371
r.fill(&mut seed);
360372
let (length, amount): (usize, usize) = (100, 50);
361-
let v1 = sample(&mut xor_rng(seed), length, amount, true);
373+
let v1 = sample(&mut xor_rng(seed), length, amount);
362374
let v2 = sample_inplace(&mut xor_rng(seed), length as u32, amount as u32);
363375
assert!(v1.iter().all(|e| e < length));
364376
assert_eq!(v1, v2);
365377

366378
// Test Floyd's alg does produce different results
367-
let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true);
379+
let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32);
368380
assert!(v1 != v3);
369381

370382
// A large length and small amount should use Floyd
371383
r.fill(&mut seed);
372384
let (length, amount): (usize, usize) = (1<<20, 50);
373-
let v1 = sample(&mut xor_rng(seed), length, amount, true);
374-
let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true);
385+
let v1 = sample(&mut xor_rng(seed), length, amount);
386+
let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32);
375387
assert!(v1.iter().all(|e| e < length));
376388
assert_eq!(v1, v2);
377389

378390
// A large length and larger amount should use cache
379391
r.fill(&mut seed);
380392
let (length, amount): (usize, usize) = (1<<20, 600);
381-
let v1 = sample(&mut xor_rng(seed), length, amount, true);
393+
let v1 = sample(&mut xor_rng(seed), length, amount);
382394
let v2 = sample_rejection(&mut xor_rng(seed), length, amount);
383395
assert!(v1.iter().all(|e| e < length));
384396
assert_eq!(v1, v2);

src/seq/mod.rs

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,11 @@ pub trait SliceRandom {
5858
where R: Rng + ?Sized;
5959

6060
/// Produces an iterator that chooses `amount` elements from the slice at
61-
/// random without repeating any.
62-
///
61+
/// random without repeating any, and returns them in random order.
62+
///
6363
/// In case this API is not sufficiently flexible, use `index::sample` then
6464
/// apply the indices to the slice.
6565
///
66-
/// If `shuffled == true` then the sampled values will be fully shuffled;
67-
/// otherwise the values may only partially shuffled, depending on the
68-
/// algorithm used (i.e. biases may exist in the ordering of sampled
69-
/// elements). Depending on the algorithm used internally, full shuffling
70-
/// may add significant overhead for `amount` > 10 or so, but not more
71-
/// than double the time and often much less.
72-
///
7366
/// Complexity is expected to be the same as `index::sample`.
7467
///
7568
/// # Example
@@ -80,16 +73,16 @@ pub trait SliceRandom {
8073
/// let sample = "Hello, audience!".as_bytes();
8174
///
8275
/// // collect the results into a vector:
83-
/// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3, true).cloned().collect();
76+
/// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect();
8477
///
8578
/// // store in a buffer:
8679
/// let mut buf = [0u8; 5];
87-
/// for (b, slot) in sample.choose_multiple(&mut rng, buf.len(), true).zip(buf.iter_mut()) {
80+
/// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) {
8881
/// *slot = *b;
8982
/// }
9083
/// ```
9184
#[cfg(feature = "alloc")]
92-
fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) -> SliceChooseIter<Self, Self::Item>
85+
fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item>
9386
where R: Rng + ?Sized;
9487

9588
/// Similar to [`choose`], where the likelihood of each outcome may be
@@ -315,15 +308,15 @@ impl<T> SliceRandom for [T] {
315308
}
316309

317310
#[cfg(feature = "alloc")]
318-
fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool)
311+
fn choose_multiple<R>(&self, rng: &mut R, amount: usize)
319312
-> SliceChooseIter<Self, Self::Item>
320313
where R: Rng + ?Sized
321314
{
322315
let amount = ::core::cmp::min(amount, self.len());
323316
SliceChooseIter {
324317
slice: self,
325318
_phantom: Default::default(),
326-
indices: index::sample(rng, self.len(), amount, shuffled).into_iter(),
319+
indices: index::sample(rng, self.len(), amount).into_iter(),
327320
}
328321
}
329322

@@ -460,7 +453,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
460453
where R: Rng + ?Sized,
461454
T: Clone
462455
{
463-
let indices = index::sample(rng, slice.len(), amount, true).into_iter();
456+
let indices = index::sample(rng, slice.len(), amount).into_iter();
464457

465458
let mut out = Vec::with_capacity(amount);
466459
out.extend(indices.map(|i| slice[i].clone()));
@@ -483,7 +476,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
483476
pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T>
484477
where R: Rng + ?Sized
485478
{
486-
let indices = index::sample(rng, slice.len(), amount, true).into_iter();
479+
let indices = index::sample(rng, slice.len(), amount).into_iter();
487480

488481
let mut out = Vec::with_capacity(amount);
489482
out.extend(indices.map(|i| &slice[i]));
@@ -679,8 +672,7 @@ mod test {
679672
r.fill(&mut seed);
680673

681674
// assert the basics work
682-
let regular = index::sample(
683-
&mut xor_rng(seed), length, amount, true);
675+
let regular = index::sample(&mut xor_rng(seed), length, amount);
684676
assert_eq!(regular.len(), amount);
685677
assert!(regular.iter().all(|e| e < length));
686678

0 commit comments

Comments
 (0)