@@ -443,7 +443,7 @@ impl<T> Bucket<T> {
443
443
// | (to the end of T5)
444
444
// | | `base: NonNull<T>` must point here
445
445
// v | (to the end of T0 or to the start of C0)
446
- // /‾‾‾ \ v v
446
+ // /??? \ v v
447
447
// [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast
448
448
// \__________ __________/
449
449
// \/
@@ -1083,7 +1083,7 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
1083
1083
/// without reallocation.
1084
1084
#[ cfg_attr( feature = "inline-more" , inline) ]
1085
1085
pub fn reserve ( & mut self , additional : usize , hasher : impl Fn ( & T ) -> u64 ) {
1086
- if additional > self . table . growth_left {
1086
+ if unlikely ( additional > self . table . growth_left ) {
1087
1087
// Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
1088
1088
if self
1089
1089
. reserve_rehash ( additional, hasher, Fallibility :: Infallible )
@@ -1252,6 +1252,22 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
1252
1252
}
1253
1253
}
1254
1254
1255
+ /// Searches for an element in the table,
1256
+ /// or a potential slot where that element could be inserted.
1257
+ #[ inline]
1258
+ pub fn find_potential ( & self , hash : u64 , mut eq : impl FnMut ( & T ) -> bool ) -> ( usize , bool ) {
1259
+ self . table . find_potential_inner ( hash, & mut |index| unsafe {
1260
+ eq ( self . bucket ( index) . as_ref ( ) )
1261
+ } )
1262
+ }
1263
+
1264
+ /// Marks an element in the table as inserted.
1265
+ #[ inline]
1266
+ pub unsafe fn mark_inserted ( & mut self , index : usize , hash : u64 ) {
1267
+ let old_ctrl = * self . table . ctrl ( index) ;
1268
+ self . table . record_item_insert_at ( index, old_ctrl, hash) ;
1269
+ }
1270
+
1255
1271
/// Searches for an element in the table.
1256
1272
#[ inline]
1257
1273
pub fn find ( & self , hash : u64 , mut eq : impl FnMut ( & T ) -> bool ) -> Option < Bucket < T > > {
@@ -1585,6 +1601,106 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1585
1601
}
1586
1602
}
1587
1603
1604
+ /// Fixes up an insertion slot due to false positives for groups smaller than the group width.
1605
+ /// This must only be used on insertion slots found by `find_insert_slot_in_group`.
1606
+ #[ inline]
1607
+ unsafe fn fix_insert_slot ( & self , index : usize ) -> usize {
1608
+ // In tables smaller than the group width
1609
+ // (self.buckets() < Group::WIDTH), trailing control
1610
+ // bytes outside the range of the table are filled with
1611
+ // EMPTY entries. These will unfortunately trigger a
1612
+ // match, but once masked may point to a full bucket that
1613
+ // is already occupied. We detect this situation here and
1614
+ // perform a second scan starting at the beginning of the
1615
+ // table. This second scan is guaranteed to find an empty
1616
+ // slot (due to the load factor) before hitting the trailing
1617
+ // control bytes (containing EMPTY).
1618
+ if unlikely ( self . is_bucket_full ( index) ) {
1619
+ debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1620
+ // SAFETY:
1621
+ //
1622
+ // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1623
+ // and properly aligned, because the table is already allocated
1624
+ // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1625
+ //
1626
+ // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1627
+ // we will never end up in the given branch, since
1628
+ // `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` cannot
1629
+ // return a full bucket index. For tables smaller than the group width, calling the
1630
+ // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1631
+ // safe, as the trailing control bytes outside the range of the table are filled
1632
+ // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1633
+ // load factor) or hits the trailing control bytes (containing EMPTY). See
1634
+ // `intrinsics::cttz_nonzero` for more information.
1635
+ Group :: load_aligned ( self . ctrl ( 0 ) )
1636
+ . match_empty_or_deleted ( )
1637
+ . lowest_set_bit_nonzero ( )
1638
+ } else {
1639
+ index
1640
+ }
1641
+ }
1642
+
1643
+ /// Finds the position to insert something in a group.
1644
+ /// This may have false positives and must be fixed up with `fix_insert_slot` before it's used.
1645
+ #[ inline]
1646
+ fn find_insert_slot_in_group ( & self , group : & Group , probe_seq : & ProbeSeq ) -> Option < usize > {
1647
+ let bit = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) ;
1648
+
1649
+ if likely ( bit. is_some ( ) ) {
1650
+ Some ( ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask )
1651
+ } else {
1652
+ None
1653
+ }
1654
+ }
1655
+
1656
+ /// Searches for an element in the table, or a potential slot where that element could be
1657
+ /// inserted.
1658
+ ///
1659
+ /// This uses dynamic dispatch to reduce the amount of code generated, but that is
1660
+ /// eliminated by LLVM optimizations.
1661
+ #[ inline]
1662
+ pub fn find_potential_inner (
1663
+ & self ,
1664
+ hash : u64 ,
1665
+ eq : & mut dyn FnMut ( usize ) -> bool ,
1666
+ ) -> ( usize , bool ) {
1667
+ let mut insert_slot = None ;
1668
+
1669
+ let h2_hash = h2 ( hash) ;
1670
+ let mut probe_seq = self . probe_seq ( hash) ;
1671
+
1672
+ loop {
1673
+ let group = unsafe { Group :: load ( self . ctrl ( probe_seq. pos ) ) } ;
1674
+
1675
+ for bit in group. match_byte ( h2_hash) {
1676
+ let index = ( probe_seq. pos + bit) & self . bucket_mask ;
1677
+
1678
+ if likely ( eq ( index) ) {
1679
+ return ( index, true ) ;
1680
+ }
1681
+ }
1682
+
1683
+ // We didn't find the element we were looking for in the group, try to get an
1684
+ // insertion slot from the group if we don't have one yet.
1685
+ if likely ( insert_slot. is_none ( ) ) {
1686
+ insert_slot = self . find_insert_slot_in_group ( & group, & probe_seq) ;
1687
+ }
1688
+
1689
+ // Only stop the search if the group contains at least one empty element.
1690
+ // Otherwise, the element that we are looking for might be in a following group.
1691
+ if likely ( group. match_empty ( ) . any_bit_set ( ) ) {
1692
+ // We must have found a insert slot by now, since the current group contains at
1693
+ // least one. For tables smaller than the group width, there will still be an
1694
+ // empty element in the current (and only) group due to the load factor.
1695
+ unsafe {
1696
+ return ( self . fix_insert_slot ( insert_slot. unwrap_unchecked ( ) ) , false ) ;
1697
+ }
1698
+ }
1699
+
1700
+ probe_seq. move_next ( self . bucket_mask ) ;
1701
+ }
1702
+ }
1703
+
1588
1704
/// Searches for an empty or deleted bucket which is suitable for inserting
1589
1705
/// a new element and sets the hash for that slot.
1590
1706
///
@@ -1637,48 +1753,10 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1637
1753
// bytes, which is safe (see RawTableInner::new_in).
1638
1754
unsafe {
1639
1755
let group = Group :: load ( self . ctrl ( probe_seq. pos ) ) ;
1640
- if let Some ( bit) = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) {
1641
- // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
1642
- // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
1643
- let result = ( probe_seq. pos + bit) & self . bucket_mask ;
1644
-
1645
- // In tables smaller than the group width
1646
- // (self.buckets() < Group::WIDTH), trailing control
1647
- // bytes outside the range of the table are filled with
1648
- // EMPTY entries. These will unfortunately trigger a
1649
- // match, but once masked may point to a full bucket that
1650
- // is already occupied. We detect this situation here and
1651
- // perform a second scan starting at the beginning of the
1652
- // table. This second scan is guaranteed to find an empty
1653
- // slot (due to the load factor) before hitting the trailing
1654
- // control bytes (containing EMPTY).
1655
- //
1656
- // SAFETY: The `result` is guaranteed to be in range `0..self.bucket_mask`
1657
- // due to masking with `self.bucket_mask`
1658
- if unlikely ( self . is_bucket_full ( result) ) {
1659
- debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1660
- debug_assert_ne ! ( probe_seq. pos, 0 ) ;
1661
- // SAFETY:
1662
- //
1663
- // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1664
- // and properly aligned, because the table is already allocated
1665
- // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1666
- //
1667
- // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1668
- // we will never end up in the given branch, since
1669
- // `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1670
- // full bucket index. For tables smaller than the group width, calling the
1671
- // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1672
- // safe, as the trailing control bytes outside the range of the table are filled
1673
- // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1674
- // load factor) or hits the trailing control bytes (containing EMPTY). See
1675
- // `intrinsics::cttz_nonzero` for more information.
1676
- return Group :: load_aligned ( self . ctrl ( 0 ) )
1677
- . match_empty_or_deleted ( )
1678
- . lowest_set_bit_nonzero ( ) ;
1679
- }
1756
+ let index = self . find_insert_slot_in_group ( & group, & probe_seq) ;
1680
1757
1681
- return result;
1758
+ if likely ( index. is_some ( ) ) {
1759
+ return self . fix_insert_slot ( index. unwrap_unchecked ( ) ) ;
1682
1760
}
1683
1761
}
1684
1762
probe_seq. move_next ( self . bucket_mask ) ;
0 commit comments