Skip to content

Commit 02d6e00

Browse files
author
Stjepan Glavina
committed
Use partial insertion sort
1 parent c4454a5 commit 02d6e00

File tree

1 file changed

+119
-54
lines changed

1 file changed

+119
-54
lines changed

src/libcore/slice/sort.rs

+119-54
Original file line numberDiff line numberDiff line change
@@ -38,64 +38,125 @@ impl<T> Drop for CopyOnDrop<T> {
3838
}
3939
}
4040

41-
/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
42-
fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
41+
/// Shifts the first element to the right until it encounters a greater or equal element.
42+
fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
4343
where F: FnMut(&T, &T) -> bool
4444
{
4545
let len = v.len();
46+
unsafe {
47+
// If the first two elements are out-of-order...
48+
if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
49+
// Read the first element into a stack-allocated variable. If a following comparison
50+
// operation panics, `hole` will get dropped and automatically write the element back
51+
// into the slice.
52+
let mut tmp = NoDrop { value: ptr::read(v.get_unchecked(0)) };
53+
let mut hole = CopyOnDrop {
54+
src: &mut tmp.value,
55+
dest: v.get_unchecked_mut(1),
56+
};
57+
ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
4658

47-
for i in 1..len {
48-
unsafe {
49-
if is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
50-
// There are three ways to implement insertion here:
51-
//
52-
// 1. Swap adjacent elements until the first one gets to its final destination.
53-
// However, this way we copy data around more than is necessary. If elements are
54-
// big structures (costly to copy), this method will be slow.
55-
//
56-
// 2. Iterate until the right place for the first element is found. Then shift the
57-
// elements succeeding it to make room for it and finally place it into the
58-
// remaining hole. This is a good method.
59-
//
60-
// 3. Copy the first element into a temporary variable. Iterate until the right
61-
// place for it is found. As we go along, copy every traversed element into the
62-
// slot preceding it. Finally, copy data from the temporary variable into the
63-
// remaining hole. This method is very good. Benchmarks demonstrated slightly
64-
// better performance than with the 2nd method.
65-
//
66-
// All methods were benchmarked, and the 3rd showed best results. So we chose that
67-
// one.
68-
let mut tmp = NoDrop { value: ptr::read(v.get_unchecked(i)) };
69-
70-
// Intermediate state of the insertion process is always tracked by `hole`, which
71-
// serves two purposes:
72-
// 1. Protects integrity of `v` from panics in `is_less`.
73-
// 2. Fills the remaining hole in `v` in the end.
74-
//
75-
// Panic safety:
76-
//
77-
// If `is_less` panics at any point during the process, `hole` will get dropped and
78-
// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object
79-
// it initially held exactly once.
80-
let mut hole = CopyOnDrop {
81-
src: &mut tmp.value,
82-
dest: v.get_unchecked_mut(i - 1),
83-
};
84-
ptr::copy_nonoverlapping(v.get_unchecked(i - 1), v.get_unchecked_mut(i), 1);
85-
86-
for h in (0..i-1).rev() {
87-
if !is_less(&tmp.value, v.get_unchecked(h)) {
88-
break;
89-
}
90-
ptr::copy_nonoverlapping(v.get_unchecked(h), v.get_unchecked_mut(h + 1), 1);
91-
hole.dest = v.get_unchecked_mut(h);
59+
for i in 2..len {
60+
if !is_less(&v[i], &tmp.value) {
61+
break;
9262
}
93-
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
63+
64+
// Move `i`-th element one place to the left, thus shifting the hole to the right.
65+
ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
66+
hole.dest = v.get_unchecked_mut(i);
9467
}
68+
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
9569
}
9670
}
9771
}
9872

73+
/// Shifts the last element to the left until it encounters a smaller or equal element.
74+
fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
75+
where F: FnMut(&T, &T) -> bool
76+
{
77+
let len = v.len();
78+
unsafe {
79+
// If the last two elements are out-of-order...
80+
if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
81+
// Read the last element into a stack-allocated variable. If a following comparison
82+
// operation panics, `hole` will get dropped and automatically write the element back
83+
// into the slice.
84+
let mut tmp = NoDrop { value: ptr::read(v.get_unchecked(len - 1)) };
85+
let mut hole = CopyOnDrop {
86+
src: &mut tmp.value,
87+
dest: v.get_unchecked_mut(len - 2),
88+
};
89+
ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
90+
91+
for i in (0..len-2).rev() {
92+
if !is_less(&tmp.value, v.get_unchecked(i)) {
93+
break;
94+
}
95+
96+
// Move `i`-th element one place to the right, thus shifting the hole to the left.
97+
ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
98+
hole.dest = v.get_unchecked_mut(i);
99+
}
100+
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
101+
}
102+
}
103+
}
104+
105+
/// Partially sorts a slice by shifting several out-of-order elements around.
106+
///
107+
/// Returns true if the slice is sorted at the end. This function is `O(n)` worst-case.
108+
#[cold]
109+
fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
110+
where F: FnMut(&T, &T) -> bool
111+
{
112+
// Maximum number of adjacent out-of-order pairs that will get shifted.
113+
const MAX_STEPS: usize = 5;
114+
// If the slice is shorter than this, don't shift any elements.
115+
const SHORTEST_SHIFTING: usize = 50;
116+
117+
let len = v.len();
118+
let mut i = 1;
119+
120+
for _ in 0..MAX_STEPS {
121+
unsafe {
122+
// Find the next pair of adjacent out-of-order elements.
123+
while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
124+
i += 1;
125+
}
126+
}
127+
128+
// Are we done?
129+
if i == len {
130+
return true;
131+
}
132+
133+
// Don't shift elements on short arrays, that has a performance cost.
134+
if len < SHORTEST_SHIFTING {
135+
return false;
136+
}
137+
138+
// Swap the found pair of elements. This puts them in correct order.
139+
v.swap(i - 1, i);
140+
141+
// Shift the smaller element to the left.
142+
shift_tail(&mut v[..i], is_less);
143+
// Shift the greater element to the right.
144+
shift_head(&mut v[i..], is_less);
145+
}
146+
147+
// Didn't manage to sort the slice in the limited number of steps.
148+
false
149+
}
150+
151+
/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
152+
fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
153+
where F: FnMut(&T, &T) -> bool
154+
{
155+
for i in 2..v.len()+1 {
156+
shift_tail(&mut v[..i], is_less);
157+
}
158+
}
159+
99160
/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
100161
#[cold]
101162
fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
@@ -180,6 +241,9 @@ fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
180241
let mut end_r = ptr::null_mut();
181242
let mut offsets_r: [u8; BLOCK] = unsafe { mem::uninitialized() };
182243

244+
// FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
245+
// than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
246+
183247
// Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
184248
fn width<T>(l: *mut T, r: *mut T) -> usize {
185249
assert!(mem::size_of::<T>() > 0);
@@ -470,10 +534,10 @@ fn break_patterns<T>(v: &mut [T]) {
470534
fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
471535
where F: FnMut(&T, &T) -> bool
472536
{
473-
// Minimal length to choose the median-of-medians method.
537+
// Minimum length to choose the median-of-medians method.
474538
// Shorter slices use the simple median-of-three method.
475-
const SHORTEST_MEDIAN_OF_MEDIANS: usize = 80;
476-
// Maximal number of swaps that can be performed in this function.
539+
const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
540+
// Maximum number of swaps that can be performed in this function.
477541
const MAX_SWAPS: usize = 4 * 3;
478542

479543
let len = v.len();
@@ -522,7 +586,7 @@ fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
522586
if swaps < MAX_SWAPS {
523587
(b, swaps == 0)
524588
} else {
525-
// The maximal number of swaps was performed. Chances are the slice is descending or mostly
589+
// The maximum number of swaps was performed. Chances are the slice is descending or mostly
526590
// descending, so reversing will probably help sort it faster.
527591
v.reverse();
528592
(len - 1 - b, true)
@@ -575,8 +639,9 @@ fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &mut F, mut pred: Option<&'a T
575639
// If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
576640
// selection predicts the slice is likely already sorted...
577641
if was_balanced && was_partitioned && likely_sorted {
578-
// Check whether the slice really is sorted. If so, we're done.
579-
if v.windows(2).all(|w| !is_less(&w[1], &w[0])) {
642+
// Try identifying several out-of-order elements and shifting them to correct
643+
// positions. If the slice ends up being completely sorted, we're done.
644+
if partial_insertion_sort(v, is_less) {
580645
return;
581646
}
582647
}

0 commit comments

Comments
 (0)