Skip to content

Commit ba6ca78

Browse files
committed
Optimize insertion to only use a single lookup
1 parent 7d01f51 commit ba6ca78

File tree

2 files changed

+118
-27
lines changed

2 files changed

+118
-27
lines changed

src/map.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,12 +1787,21 @@ where
17871787
#[cfg_attr(feature = "inline-more", inline)]
17881788
pub fn insert(&mut self, k: K, v: V) -> Option<V> {
17891789
let hash = make_insert_hash::<K, S>(&self.hash_builder, &k);
1790-
if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) {
1791-
Some(mem::replace(item, v))
1792-
} else {
1793-
self.table
1794-
.insert(hash, (k, v), make_hasher::<_, V, S>(&self.hash_builder));
1795-
None
1790+
self.table
1791+
.reserve(1, make_hasher::<_, V, S>(&self.hash_builder));
1792+
1793+
unsafe {
1794+
let (index, found) = self.table.find_potential(hash, equivalent_key(&k));
1795+
1796+
let bucket = self.table.bucket(index);
1797+
1798+
if found {
1799+
Some(mem::replace(&mut bucket.as_mut().1, v))
1800+
} else {
1801+
self.table.mark_inserted(index, hash);
1802+
bucket.write((k, v));
1803+
None
1804+
}
17961805
}
17971806
}
17981807

src/raw/mod.rs

Lines changed: 103 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
663663
/// without reallocation.
664664
#[cfg_attr(feature = "inline-more", inline)]
665665
pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) {
666-
if additional > self.table.growth_left {
666+
if unlikely(additional > self.table.growth_left) {
667667
// Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
668668
if self
669669
.reserve_rehash(additional, hasher, Fallibility::Infallible)
@@ -832,6 +832,22 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
832832
}
833833
}
834834

835+
/// Searches for an element in the table,
836+
/// or a potential slot where that element could be inserted.
837+
#[inline]
838+
pub fn find_potential(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> (usize, bool) {
839+
self.table.find_potential_inner(hash, &mut |index| unsafe {
840+
eq(self.bucket(index).as_ref())
841+
})
842+
}
843+
844+
/// Marks an element in the table as inserted.
845+
#[inline]
846+
pub unsafe fn mark_inserted(&mut self, index: usize, hash: u64) {
847+
let old_ctrl = *self.table.ctrl(index);
848+
self.table.record_item_insert_at(index, old_ctrl, hash);
849+
}
850+
835851
/// Searches for an element in the table.
836852
#[inline]
837853
pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option<Bucket<T>> {
@@ -1138,6 +1154,89 @@ impl<A: Allocator + Clone> RawTableInner<A> {
11381154
}
11391155
}
11401156

1157+
/// Finds the position to insert something in a group.
1158+
#[inline]
1159+
fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
1160+
let bit = group.match_empty_or_deleted().lowest_set_bit();
1161+
1162+
if likely(bit.is_some()) {
1163+
let mut index = (probe_seq.pos + bit.unwrap()) & self.bucket_mask;
1164+
1165+
// In tables smaller than the group width, trailing control
1166+
// bytes outside the range of the table are filled with
1167+
// EMPTY entries. These will unfortunately trigger a
1168+
// match, but once masked may point to a full bucket that
1169+
// is already occupied. We detect this situation here and
1170+
// perform a second scan starting at the beginning of the
1171+
// table. This second scan is guaranteed to find an empty
1172+
// slot (due to the load factor) before hitting the trailing
1173+
// control bytes (containing EMPTY).
1174+
unsafe {
1175+
if unlikely(self.is_bucket_full(index)) {
1176+
debug_assert!(self.bucket_mask < Group::WIDTH);
1177+
debug_assert_ne!(probe_seq.pos, 0);
1178+
index = Group::load_aligned(self.ctrl(0))
1179+
.match_empty_or_deleted()
1180+
.lowest_set_bit_nonzero();
1181+
}
1182+
}
1183+
1184+
Some(index)
1185+
} else {
1186+
None
1187+
}
1188+
}
1189+
1190+
/// Searches for an element in the table, or a potential slot where that element could be
1191+
/// inserted.
1192+
///
1193+
/// This uses dynamic dispatch to reduce the amount of code generated, but that is
1194+
/// eliminated by LLVM optimizations.
1195+
#[inline]
1196+
pub fn find_potential_inner(
1197+
&self,
1198+
hash: u64,
1199+
eq: &mut dyn FnMut(usize) -> bool,
1200+
) -> (usize, bool) {
1201+
let mut insert_slot = None;
1202+
1203+
let h2_hash = h2(hash);
1204+
let mut probe_seq = self.probe_seq(hash);
1205+
1206+
loop {
1207+
let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
1208+
1209+
for bit in group.match_byte(h2_hash) {
1210+
let index = (probe_seq.pos + bit) & self.bucket_mask;
1211+
1212+
if likely(eq(index)) {
1213+
return (index, true);
1214+
}
1215+
}
1216+
1217+
// We didn't find the element we were looking for in the group, try to get an
1218+
// insertion slot from the group if we don't have one yet.
1219+
if likely(insert_slot.is_none()) {
1220+
insert_slot = self.find_insert_slot_in_group(&group, &probe_seq);
1221+
}
1222+
1223+
// Only stop the search if the group contains at least one empty element.
1224+
// Otherwise, the element that we are looking for might be in a following group.
1225+
if likely(group.match_empty().any_bit_set()) {
1226+
// We must have found a insert slot by now, since the current group contains at
1227+
// least one. For tables smaller than the group width, there will still be an
1228+
// empty element in the current (and only) group due to the load factor.
1229+
debug_assert!(insert_slot.is_some());
1230+
match insert_slot {
1231+
Some(insert_slot) => return (insert_slot, false),
1232+
None => unsafe { hint::unreachable_unchecked() },
1233+
}
1234+
}
1235+
1236+
probe_seq.move_next(self.bucket_mask);
1237+
}
1238+
}
1239+
11411240
/// Searches for an empty or deleted bucket which is suitable for inserting
11421241
/// a new element and sets the hash for that slot.
11431242
///
@@ -1160,27 +1259,10 @@ impl<A: Allocator + Clone> RawTableInner<A> {
11601259
loop {
11611260
unsafe {
11621261
let group = Group::load(self.ctrl(probe_seq.pos));
1163-
if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() {
1164-
let result = (probe_seq.pos + bit) & self.bucket_mask;
1165-
1166-
// In tables smaller than the group width, trailing control
1167-
// bytes outside the range of the table are filled with
1168-
// EMPTY entries. These will unfortunately trigger a
1169-
// match, but once masked may point to a full bucket that
1170-
// is already occupied. We detect this situation here and
1171-
// perform a second scan starting at the beginning of the
1172-
// table. This second scan is guaranteed to find an empty
1173-
// slot (due to the load factor) before hitting the trailing
1174-
// control bytes (containing EMPTY).
1175-
if unlikely(self.is_bucket_full(result)) {
1176-
debug_assert!(self.bucket_mask < Group::WIDTH);
1177-
debug_assert_ne!(probe_seq.pos, 0);
1178-
return Group::load_aligned(self.ctrl(0))
1179-
.match_empty_or_deleted()
1180-
.lowest_set_bit_nonzero();
1181-
}
1262+
let index = self.find_insert_slot_in_group(&group, &probe_seq);
11821263

1183-
return result;
1264+
if likely(index.is_some()) {
1265+
return index.unwrap();
11841266
}
11851267
}
11861268
probe_seq.move_next(self.bucket_mask);

0 commit comments

Comments
 (0)