Skip to content

Commit b695ed3

Browse files
authored
Rollup merge of #103446 - the8472:tra-array-chunks, r=Mark-Simulacrum
Specialize `iter::ArrayChunks::fold` for TrustedRandomAccess iterators ``` OLD: test iter::bench_trusted_random_access_chunks ... bench: 368 ns/iter (+/- 4) NEW: test iter::bench_trusted_random_access_chunks ... bench: 30 ns/iter (+/- 0) ``` The resulting assembly is similar to #103166 but the specialization kicks in under different (partially overlapping) conditions compared to that PR. They're complementary. In principle a TRA-based specialization could be applied to all `ArrayChunks` methods, including `next()` as we do for `Zip` but that would have all the same hazards as the Zip specialization. Only doing it for `fold` is far less hazardous. The downside is that it only helps with internal, exhaustive iteration. I.e. `for _ in` or `try_fold` will not benefit. Note that the regular, `try_fold`-based and the specialized `fold()` impl have observably slightly different behavior. Namely the specialized variant does not fetch the remainder elements from the underlying iterator. We do have a few other places in the standard library where beyond-the-end-of-iteration side-effects are being elided under some circumstances but not others. Inspired by https://old.reddit.com/r/rust/comments/yaft60/zerocost_iterator_abstractionsnot_so_zerocost/
2 parents 6184a96 + 3925fc0 commit b695ed3

File tree

5 files changed

+148
-29
lines changed

5 files changed

+148
-29
lines changed

library/core/benches/iter.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::borrow::Borrow;
12
use core::iter::*;
23
use core::mem;
34
use core::num::Wrapping;
@@ -403,13 +404,31 @@ fn bench_trusted_random_access_adapters(b: &mut Bencher) {
403404

404405
/// Exercises the iter::Copied specialization for slice::Iter
405406
#[bench]
406-
fn bench_copied_array_chunks(b: &mut Bencher) {
407+
fn bench_copied_chunks(b: &mut Bencher) {
408+
let v = vec![1u8; 1024];
409+
410+
b.iter(|| {
411+
let mut iter = black_box(&v).iter().copied();
412+
let mut acc = Wrapping(0);
413+
// This uses a while-let loop to side-step the TRA specialization in ArrayChunks
414+
while let Ok(chunk) = iter.next_chunk::<{ mem::size_of::<u64>() }>() {
415+
let d = u64::from_ne_bytes(chunk);
416+
acc += Wrapping(d.rotate_left(7).wrapping_add(1));
417+
}
418+
acc
419+
})
420+
}
421+
422+
/// Exercises the TrustedRandomAccess specialization in ArrayChunks
423+
#[bench]
424+
fn bench_trusted_random_access_chunks(b: &mut Bencher) {
407425
let v = vec![1u8; 1024];
408426

409427
b.iter(|| {
410428
black_box(&v)
411429
.iter()
412-
.copied()
430+
// this shows that we're not relying on the slice::Iter specialization in Copied
431+
.map(|b| *b.borrow())
413432
.array_chunks::<{ mem::size_of::<u64>() }>()
414433
.map(|ary| {
415434
let d = u64::from_ne_bytes(ary);

library/core/benches/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#![feature(test)]
66
#![feature(trusted_random_access)]
77
#![feature(iter_array_chunks)]
8+
#![feature(iter_next_chunk)]
89

910
extern crate test;
1011

library/core/src/array/mod.rs

+52-23
Original file line numberDiff line numberDiff line change
@@ -865,24 +865,6 @@ where
865865
return Ok(Try::from_output(unsafe { mem::zeroed() }));
866866
}
867867

868-
struct Guard<'a, T, const N: usize> {
869-
array_mut: &'a mut [MaybeUninit<T>; N],
870-
initialized: usize,
871-
}
872-
873-
impl<T, const N: usize> Drop for Guard<'_, T, N> {
874-
fn drop(&mut self) {
875-
debug_assert!(self.initialized <= N);
876-
877-
// SAFETY: this slice will contain only initialized objects.
878-
unsafe {
879-
crate::ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(
880-
&mut self.array_mut.get_unchecked_mut(..self.initialized),
881-
));
882-
}
883-
}
884-
}
885-
886868
let mut array = MaybeUninit::uninit_array::<N>();
887869
let mut guard = Guard { array_mut: &mut array, initialized: 0 };
888870

@@ -896,13 +878,11 @@ where
896878
ControlFlow::Continue(elem) => elem,
897879
};
898880

899-
// SAFETY: `guard.initialized` starts at 0, is increased by one in the
900-
// loop and the loop is aborted once it reaches N (which is
901-
// `array.len()`).
881+
// SAFETY: `guard.initialized` starts at 0, which means push can be called
882+
// at most N times, which this loop does.
902883
unsafe {
903-
guard.array_mut.get_unchecked_mut(guard.initialized).write(item);
884+
guard.push_unchecked(item);
904885
}
905-
guard.initialized += 1;
906886
}
907887
None => {
908888
let alive = 0..guard.initialized;
@@ -920,6 +900,55 @@ where
920900
Ok(Try::from_output(output))
921901
}
922902

903+
/// Panic guard for incremental initialization of arrays.
904+
///
905+
/// Disarm the guard with `mem::forget` once the array has been initialized.
906+
///
907+
/// # Safety
908+
///
909+
/// All write accesses to this structure are unsafe and must maintain a correct
910+
/// count of `initialized` elements.
911+
///
912+
/// To minimize indirection fields are still pub but callers should at least use
913+
/// `push_unchecked` to signal that something unsafe is going on.
914+
pub(crate) struct Guard<'a, T, const N: usize> {
915+
/// The array to be initialized.
916+
pub array_mut: &'a mut [MaybeUninit<T>; N],
917+
/// The number of items that have been initialized so far.
918+
pub initialized: usize,
919+
}
920+
921+
impl<T, const N: usize> Guard<'_, T, N> {
922+
/// Adds an item to the array and updates the initialized item counter.
923+
///
924+
/// # Safety
925+
///
926+
/// No more than N elements must be initialized.
927+
#[inline]
928+
pub unsafe fn push_unchecked(&mut self, item: T) {
929+
// SAFETY: If `initialized` was correct before and the caller does not
930+
// invoke this method more than N times then writes will be in-bounds
931+
// and slots will not be initialized more than once.
932+
unsafe {
933+
self.array_mut.get_unchecked_mut(self.initialized).write(item);
934+
self.initialized = self.initialized.unchecked_add(1);
935+
}
936+
}
937+
}
938+
939+
impl<T, const N: usize> Drop for Guard<'_, T, N> {
940+
fn drop(&mut self) {
941+
debug_assert!(self.initialized <= N);
942+
943+
// SAFETY: this slice will contain only initialized objects.
944+
unsafe {
945+
crate::ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(
946+
&mut self.array_mut.get_unchecked_mut(..self.initialized),
947+
));
948+
}
949+
}
950+
}
951+
923952
/// Returns the next chunk of `N` items from the iterator or errors with an
924953
/// iterator over the remainder. Used for `Iterator::next_chunk`.
925954
#[inline]

library/core/src/iter/adapters/array_chunks.rs

+72-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use crate::array;
2-
use crate::iter::{ByRefSized, FusedIterator, Iterator};
3-
use crate::ops::{ControlFlow, Try};
2+
use crate::const_closure::ConstFnMutClosure;
3+
use crate::iter::{ByRefSized, FusedIterator, Iterator, TrustedRandomAccessNoCoerce};
4+
use crate::mem::{self, MaybeUninit};
5+
use crate::ops::{ControlFlow, NeverShortCircuit, Try};
46

57
/// An iterator over `N` elements of the iterator at a time.
68
///
@@ -82,7 +84,13 @@ where
8284
}
8385
}
8486

85-
impl_fold_via_try_fold! { fold -> try_fold }
87+
fn fold<B, F>(self, init: B, f: F) -> B
88+
where
89+
Self: Sized,
90+
F: FnMut(B, Self::Item) -> B,
91+
{
92+
<Self as SpecFold>::fold(self, init, f)
93+
}
8694
}
8795

8896
#[unstable(feature = "iter_array_chunks", reason = "recently added", issue = "100450")]
@@ -168,3 +176,64 @@ where
168176
self.iter.len() < N
169177
}
170178
}
179+
180+
trait SpecFold: Iterator {
181+
fn fold<B, F>(self, init: B, f: F) -> B
182+
where
183+
Self: Sized,
184+
F: FnMut(B, Self::Item) -> B;
185+
}
186+
187+
impl<I, const N: usize> SpecFold for ArrayChunks<I, N>
188+
where
189+
I: Iterator,
190+
{
191+
#[inline]
192+
default fn fold<B, F>(mut self, init: B, mut f: F) -> B
193+
where
194+
Self: Sized,
195+
F: FnMut(B, Self::Item) -> B,
196+
{
197+
let fold = ConstFnMutClosure::new(&mut f, NeverShortCircuit::wrap_mut_2_imp);
198+
self.try_fold(init, fold).0
199+
}
200+
}
201+
202+
impl<I, const N: usize> SpecFold for ArrayChunks<I, N>
203+
where
204+
I: Iterator + TrustedRandomAccessNoCoerce,
205+
{
206+
#[inline]
207+
fn fold<B, F>(mut self, init: B, mut f: F) -> B
208+
where
209+
Self: Sized,
210+
F: FnMut(B, Self::Item) -> B,
211+
{
212+
let mut accum = init;
213+
let inner_len = self.iter.size();
214+
let mut i = 0;
215+
// Use a while loop because (0..len).step_by(N) doesn't optimize well.
216+
while inner_len - i >= N {
217+
let mut chunk = MaybeUninit::uninit_array();
218+
let mut guard = array::Guard { array_mut: &mut chunk, initialized: 0 };
219+
while guard.initialized < N {
220+
// SAFETY: The method consumes the iterator and the loop condition ensures that
221+
// all accesses are in bounds and only happen once.
222+
unsafe {
223+
let idx = i + guard.initialized;
224+
guard.push_unchecked(self.iter.__iterator_get_unchecked(idx));
225+
}
226+
}
227+
mem::forget(guard);
228+
// SAFETY: The loop above initialized all elements
229+
let chunk = unsafe { MaybeUninit::array_assume_init(chunk) };
230+
accum = f(accum, chunk);
231+
i += N;
232+
}
233+
234+
// unlike try_fold this method does not need to take care of the remainder
235+
// since `self` will be dropped
236+
237+
accum
238+
}
239+
}

library/core/tests/iter/adapters/array_chunks.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ fn test_iterator_array_chunks_fold() {
139139
let result =
140140
(0..10).map(|_| CountDrop::new(&count)).array_chunks::<3>().fold(0, |acc, _item| acc + 1);
141141
assert_eq!(result, 3);
142-
assert_eq!(count.get(), 10);
142+
// fold impls may or may not process the remainder
143+
assert!(count.get() <= 10 && count.get() >= 9);
143144
}
144145

145146
#[test]

0 commit comments

Comments
 (0)