diff --git a/Cargo.lock b/Cargo.lock
index 99cb71cd0ac87..d398c1d5637b2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3855,6 +3855,7 @@ dependencies = [
 name = "rustc_index"
 version = "0.0.0"
 dependencies = [
+ "itertools",
  "rustc_index_macros",
  "rustc_macros",
  "rustc_serialize",
diff --git a/compiler/rustc_abi/src/layout/coroutine.rs b/compiler/rustc_abi/src/layout/coroutine.rs
index 27e704d538c83..73564705686f0 100644
--- a/compiler/rustc_abi/src/layout/coroutine.rs
+++ b/compiler/rustc_abi/src/layout/coroutine.rs
@@ -120,7 +120,7 @@ fn coroutine_saved_local_eligibility<VariantIdx: Idx, FieldIdx: Idx, LocalIdx: I
             for assignment in assignments.iter_mut() {
                 *assignment = Ineligible(None);
             }
-            ineligible_locals.insert_all();
+            ineligible_locals.insert_all(nb_locals);
         }
     }
 
diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
index 635f241847c43..9f854ca9735eb 100644
--- a/compiler/rustc_data_structures/src/stable_hasher/tests.rs
+++ b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
@@ -14,16 +14,6 @@ fn hash<T: HashStable<()>>(t: &T) -> Hash128 {
     h.finish()
 }
 
-// Check that bit set hash includes the domain size.
-#[test]
-fn test_hash_bit_set() {
-    use rustc_index::bit_set::DenseBitSet;
-    let a: DenseBitSet<usize> = DenseBitSet::new_empty(1);
-    let b: DenseBitSet<usize> = DenseBitSet::new_empty(2);
-    assert_ne!(a, b);
-    assert_ne!(hash(&a), hash(&b));
-}
-
 // Check that bit matrix hash includes the matrix dimensions.
 #[test]
 fn test_hash_bit_matrix() {
diff --git a/compiler/rustc_index/Cargo.toml b/compiler/rustc_index/Cargo.toml
index 3d83a3c98daf8..9aa24e668b6b7 100644
--- a/compiler/rustc_index/Cargo.toml
+++ b/compiler/rustc_index/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2024"
 
 [dependencies]
 # tidy-alphabetical-start
+itertools = "0.12"
 rustc_index_macros = { path = "../rustc_index_macros" }
 rustc_macros = { path = "../rustc_macros", optional = true }
 rustc_serialize = { path = "../rustc_serialize", optional = true }
diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs
index 07934389158e5..e5588e90f742e 100644
--- a/compiler/rustc_index/src/bit_set.rs
+++ b/compiler/rustc_index/src/bit_set.rs
@@ -1,11 +1,13 @@
+mod dense_bit_set;
 use std::marker::PhantomData;
 #[cfg(not(feature = "nightly"))]
 use std::mem;
-use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl};
+use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Not, Range, Shl};
 use std::rc::Rc;
-use std::{fmt, iter, slice};
+use std::{fmt, iter};
 
 use Chunk::*;
+pub use dense_bit_set::{BitIter, DenseBitSet, GrowableBitSet};
 #[cfg(feature = "nightly")]
 use rustc_macros::{Decodable_NoContext, Encodable_NoContext};
 use smallvec::{SmallVec, smallvec};
@@ -43,29 +45,6 @@ pub trait BitRelations<Rhs> {
     fn intersect(&mut self, other: &Rhs) -> bool;
 }
 
-#[inline]
-fn inclusive_start_end<T: Idx>(
-    range: impl RangeBounds<T>,
-    domain: usize,
-) -> Option<(usize, usize)> {
-    // Both start and end are inclusive.
-    let start = match range.start_bound().cloned() {
-        Bound::Included(start) => start.index(),
-        Bound::Excluded(start) => start.index() + 1,
-        Bound::Unbounded => 0,
-    };
-    let end = match range.end_bound().cloned() {
-        Bound::Included(end) => end.index(),
-        Bound::Excluded(end) => end.index().checked_sub(1)?,
-        Bound::Unbounded => domain - 1,
-    };
-    assert!(end < domain);
-    if start > end {
-        return None;
-    }
-    Some((start, end))
-}
-
 macro_rules! bit_relations_inherent_impls {
     () => {
         /// Sets `self = self | other` and returns `true` if `self` changed
@@ -96,345 +75,7 @@ macro_rules! bit_relations_inherent_impls {
         }
     };
 }
-
-/// A fixed-size bitset type with a dense representation.
-///
-/// Note 1: Since this bitset is dense, if your domain is big, and/or relatively
-/// homogeneous (for example, with long runs of bits set or unset), then it may
-/// be preferable to instead use a [MixedBitSet], or an
-/// [IntervalSet](crate::interval::IntervalSet). They should be more suited to
-/// sparse, or highly-compressible, domains.
-///
-/// Note 2: Use [`GrowableBitSet`] if you need support for resizing after creation.
-///
-/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
-/// just be `usize`.
-///
-/// All operations that involve an element will panic if the element is equal
-/// to or greater than the domain size. All operations that involve two bitsets
-/// will panic if the bitsets have differing domain sizes.
-///
-#[cfg_attr(feature = "nightly", derive(Decodable_NoContext, Encodable_NoContext))]
-#[derive(Eq, PartialEq, Hash)]
-pub struct DenseBitSet<T> {
-    domain_size: usize,
-    words: SmallVec<[Word; 2]>,
-    marker: PhantomData<T>,
-}
-
-impl<T> DenseBitSet<T> {
-    /// Gets the domain size.
-    pub fn domain_size(&self) -> usize {
-        self.domain_size
-    }
-}
-
-impl<T: Idx> DenseBitSet<T> {
-    /// Creates a new, empty bitset with a given `domain_size`.
-    #[inline]
-    pub fn new_empty(domain_size: usize) -> DenseBitSet<T> {
-        let num_words = num_words(domain_size);
-        DenseBitSet { domain_size, words: smallvec![0; num_words], marker: PhantomData }
-    }
-
-    /// Creates a new, filled bitset with a given `domain_size`.
-    #[inline]
-    pub fn new_filled(domain_size: usize) -> DenseBitSet<T> {
-        let num_words = num_words(domain_size);
-        let mut result =
-            DenseBitSet { domain_size, words: smallvec![!0; num_words], marker: PhantomData };
-        result.clear_excess_bits();
-        result
-    }
-
-    /// Clear all elements.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.words.fill(0);
-    }
-
-    /// Clear excess bits in the final word.
-    fn clear_excess_bits(&mut self) {
-        clear_excess_bits_in_final_word(self.domain_size, &mut self.words);
-    }
-
-    /// Count the number of set bits in the set.
-    pub fn count(&self) -> usize {
-        self.words.iter().map(|e| e.count_ones() as usize).sum()
-    }
-
-    /// Returns `true` if `self` contains `elem`.
-    #[inline]
-    pub fn contains(&self, elem: T) -> bool {
-        assert!(elem.index() < self.domain_size);
-        let (word_index, mask) = word_index_and_mask(elem);
-        (self.words[word_index] & mask) != 0
-    }
-
-    /// Is `self` is a (non-strict) superset of `other`?
-    #[inline]
-    pub fn superset(&self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b)
-    }
-
-    /// Is the set empty?
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.words.iter().all(|a| *a == 0)
-    }
-
-    /// Insert `elem`. Returns whether the set has changed.
-    #[inline]
-    pub fn insert(&mut self, elem: T) -> bool {
-        assert!(
-            elem.index() < self.domain_size,
-            "inserting element at index {} but domain size is {}",
-            elem.index(),
-            self.domain_size,
-        );
-        let (word_index, mask) = word_index_and_mask(elem);
-        let word_ref = &mut self.words[word_index];
-        let word = *word_ref;
-        let new_word = word | mask;
-        *word_ref = new_word;
-        new_word != word
-    }
-
-    #[inline]
-    pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
-        let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
-            return;
-        };
-
-        let (start_word_index, start_mask) = word_index_and_mask(start);
-        let (end_word_index, end_mask) = word_index_and_mask(end);
-
-        // Set all words in between start and end (exclusively of both).
-        for word_index in (start_word_index + 1)..end_word_index {
-            self.words[word_index] = !0;
-        }
-
-        if start_word_index != end_word_index {
-            // Start and end are in different words, so we handle each in turn.
-            //
-            // We set all leading bits. This includes the start_mask bit.
-            self.words[start_word_index] |= !(start_mask - 1);
-            // And all trailing bits (i.e. from 0..=end) in the end word,
-            // including the end.
-            self.words[end_word_index] |= end_mask | (end_mask - 1);
-        } else {
-            self.words[start_word_index] |= end_mask | (end_mask - start_mask);
-        }
-    }
-
-    /// Sets all bits to true.
-    pub fn insert_all(&mut self) {
-        self.words.fill(!0);
-        self.clear_excess_bits();
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn remove(&mut self, elem: T) -> bool {
-        assert!(elem.index() < self.domain_size);
-        let (word_index, mask) = word_index_and_mask(elem);
-        let word_ref = &mut self.words[word_index];
-        let word = *word_ref;
-        let new_word = word & !mask;
-        *word_ref = new_word;
-        new_word != word
-    }
-
-    /// Iterates over the indices of set bits in a sorted order.
-    #[inline]
-    pub fn iter(&self) -> BitIter<'_, T> {
-        BitIter::new(&self.words)
-    }
-
-    pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
-        let (start, end) = inclusive_start_end(range, self.domain_size)?;
-        let (start_word_index, _) = word_index_and_mask(start);
-        let (end_word_index, end_mask) = word_index_and_mask(end);
-
-        let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1));
-        if end_word != 0 {
-            let pos = max_bit(end_word) + WORD_BITS * end_word_index;
-            if start <= pos {
-                return Some(T::new(pos));
-            }
-        }
-
-        // We exclude end_word_index from the range here, because we don't want
-        // to limit ourselves to *just* the last word: the bits set it in may be
-        // after `end`, so it may not work out.
-        if let Some(offset) =
-            self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
-        {
-            let word_idx = start_word_index + offset;
-            let start_word = self.words[word_idx];
-            let pos = max_bit(start_word) + WORD_BITS * word_idx;
-            if start <= pos {
-                return Some(T::new(pos));
-            }
-        }
-
-        None
-    }
-
-    bit_relations_inherent_impls! {}
-
-    /// Sets `self = self | !other`.
-    ///
-    /// FIXME: Incorporate this into [`BitRelations`] and fill out
-    /// implementations for other bitset types, if needed.
-    pub fn union_not(&mut self, other: &DenseBitSet<T>) {
-        assert_eq!(self.domain_size, other.domain_size);
-
-        // FIXME(Zalathar): If we were to forcibly _set_ all excess bits before
-        // the bitwise update, and then clear them again afterwards, we could
-        // quickly and accurately detect whether the update changed anything.
-        // But that's only worth doing if there's an actual use-case.
-
-        bitwise(&mut self.words, &other.words, |a, b| a | !b);
-        // The bitwise update `a | !b` can result in the last word containing
-        // out-of-domain bits, so we need to clear them.
-        self.clear_excess_bits();
-    }
-}
-
-// dense REL dense
-impl<T: Idx> BitRelations<DenseBitSet<T>> for DenseBitSet<T> {
-    fn union(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a | b)
-    }
-
-    fn subtract(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a & !b)
-    }
-
-    fn intersect(&mut self, other: &DenseBitSet<T>) -> bool {
-        assert_eq!(self.domain_size, other.domain_size);
-        bitwise(&mut self.words, &other.words, |a, b| a & b)
-    }
-}
-
-impl<T: Idx> From<GrowableBitSet<T>> for DenseBitSet<T> {
-    fn from(bit_set: GrowableBitSet<T>) -> Self {
-        bit_set.bit_set
-    }
-}
-
-impl<T> Clone for DenseBitSet<T> {
-    fn clone(&self) -> Self {
-        DenseBitSet {
-            domain_size: self.domain_size,
-            words: self.words.clone(),
-            marker: PhantomData,
-        }
-    }
-
-    fn clone_from(&mut self, from: &Self) {
-        self.domain_size = from.domain_size;
-        self.words.clone_from(&from.words);
-    }
-}
-
-impl<T: Idx> fmt::Debug for DenseBitSet<T> {
-    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
-        w.debug_list().entries(self.iter()).finish()
-    }
-}
-
-impl<T: Idx> ToString for DenseBitSet<T> {
-    fn to_string(&self) -> String {
-        let mut result = String::new();
-        let mut sep = '[';
-
-        // Note: this is a little endian printout of bytes.
-
-        // i tracks how many bits we have printed so far.
-        let mut i = 0;
-        for word in &self.words {
-            let mut word = *word;
-            for _ in 0..WORD_BYTES {
-                // for each byte in `word`:
-                let remain = self.domain_size - i;
-                // If less than a byte remains, then mask just that many bits.
-                let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF };
-                assert!(mask <= 0xFF);
-                let byte = word & mask;
-
-                result.push_str(&format!("{sep}{byte:02x}"));
-
-                if remain <= 8 {
-                    break;
-                }
-                word >>= 8;
-                i += 8;
-                sep = '-';
-            }
-            sep = '|';
-        }
-        result.push(']');
-
-        result
-    }
-}
-
-pub struct BitIter<'a, T: Idx> {
-    /// A copy of the current word, but with any already-visited bits cleared.
-    /// (This lets us use `trailing_zeros()` to find the next set bit.) When it
-    /// is reduced to 0, we move onto the next word.
-    word: Word,
-
-    /// The offset (measured in bits) of the current word.
-    offset: usize,
-
-    /// Underlying iterator over the words.
-    iter: slice::Iter<'a, Word>,
-
-    marker: PhantomData<T>,
-}
-
-impl<'a, T: Idx> BitIter<'a, T> {
-    #[inline]
-    fn new(words: &'a [Word]) -> BitIter<'a, T> {
-        // We initialize `word` and `offset` to degenerate values. On the first
-        // call to `next()` we will fall through to getting the first word from
-        // `iter`, which sets `word` to the first word (if there is one) and
-        // `offset` to 0. Doing it this way saves us from having to maintain
-        // additional state about whether we have started.
-        BitIter {
-            word: 0,
-            offset: usize::MAX - (WORD_BITS - 1),
-            iter: words.iter(),
-            marker: PhantomData,
-        }
-    }
-}
-
-impl<'a, T: Idx> Iterator for BitIter<'a, T> {
-    type Item = T;
-    fn next(&mut self) -> Option<T> {
-        loop {
-            if self.word != 0 {
-                // Get the position of the next set bit in the current word,
-                // then clear the bit.
-                let bit_pos = self.word.trailing_zeros() as usize;
-                self.word ^= 1 << bit_pos;
-                return Some(T::new(bit_pos + self.offset));
-            }
-
-            // Move onto the next word. `wrapping_add()` is needed to handle
-            // the degenerate initial value given to `offset` in `new()`.
-            self.word = *self.iter.next()?;
-            self.offset = self.offset.wrapping_add(WORD_BITS);
-        }
-    }
-}
+use bit_relations_inherent_impls;
 
 /// A fixed-size bitset type with a partially dense, partially sparse
 /// representation. The bitset is broken into chunks, and chunks that are all
@@ -727,7 +368,7 @@ impl<T: Idx> ChunkedBitSet<T> {
             Some(Ones(chunk_domain_size)) => ChunkIter::Ones(0..*chunk_domain_size as usize),
             Some(Mixed(chunk_domain_size, _, words)) => {
                 let num_words = num_words(*chunk_domain_size as usize);
-                ChunkIter::Mixed(BitIter::new(&words[0..num_words]))
+                ChunkIter::Mixed(BitIter::from_slice(&words[0..num_words]))
             }
             None => ChunkIter::Finished,
         }
@@ -771,8 +412,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -847,8 +488,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -898,8 +539,8 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
                     ) {
                         let self_chunk_words = Rc::make_mut(self_chunk_words);
                         let has_changed = bitwise(
-                            &mut self_chunk_words[0..num_words],
-                            &other_chunk_words[0..num_words],
+                            self_chunk_words[0..num_words].iter_mut(),
+                            other_chunk_words[0..num_words].iter().copied(),
                             op,
                         );
                         debug_assert!(has_changed);
@@ -920,48 +561,6 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
     }
 }
 
-impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
-    fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
-        sequential_update(|elem| self.insert(elem), other.iter())
-    }
-
-    fn subtract(&mut self, _other: &ChunkedBitSet<T>) -> bool {
-        unimplemented!("implement if/when necessary");
-    }
-
-    fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
-        assert_eq!(self.domain_size(), other.domain_size);
-        let mut changed = false;
-        for (i, chunk) in other.chunks.iter().enumerate() {
-            let mut words = &mut self.words[i * CHUNK_WORDS..];
-            if words.len() > CHUNK_WORDS {
-                words = &mut words[..CHUNK_WORDS];
-            }
-            match chunk {
-                Zeros(..) => {
-                    for word in words {
-                        if *word != 0 {
-                            changed = true;
-                            *word = 0;
-                        }
-                    }
-                }
-                Ones(..) => (),
-                Mixed(_, _, data) => {
-                    for (i, word) in words.iter_mut().enumerate() {
-                        let new_val = *word & data[i];
-                        if new_val != *word {
-                            changed = true;
-                            *word = new_val;
-                        }
-                    }
-                }
-            }
-        }
-        changed
-    }
-}
-
 impl<T> Clone for ChunkedBitSet<T> {
     fn clone(&self) -> Self {
         ChunkedBitSet {
@@ -1080,15 +679,6 @@ enum ChunkIter<'a> {
     Finished,
 }
 
-// Applies a function to mutate a bitset, and returns true if any
-// of the applications return true
-fn sequential_update<T: Idx>(
-    mut self_update: impl FnMut(T) -> bool,
-    it: impl Iterator<Item = T>,
-) -> bool {
-    it.fold(false, |changed, elem| self_update(elem) | changed)
-}
-
 impl<T: Idx> fmt::Debug for ChunkedBitSet<T> {
     fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
         w.debug_list().entries(self.iter()).finish()
@@ -1108,15 +698,16 @@ impl<T: Idx> fmt::Debug for ChunkedBitSet<T> {
 /// "changed" return value unreliable, because the change might have only
 /// affected excess bits.
 #[inline]
-fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool
-where
-    Op: Fn(Word, Word) -> Word,
-{
-    assert_eq!(out_vec.len(), in_vec.len());
+fn bitwise<'a>(
+    out: impl ExactSizeIterator<Item = &'a mut Word>,
+    in_: impl ExactSizeIterator<Item = Word>,
+    op: impl Fn(Word, Word) -> Word,
+) -> bool {
+    assert_eq!(out.len(), in_.len());
     let mut changed = 0;
-    for (out_elem, in_elem) in iter::zip(out_vec, in_vec) {
+    for (out_elem, in_elem) in iter::zip(out, in_) {
         let old_val = *out_elem;
-        let new_val = op(old_val, *in_elem);
+        let new_val = op(old_val, in_elem);
         *out_elem = new_val;
         // This is essentially equivalent to a != with changed being a bool, but
         // in practice this code gets auto-vectorized by the compiler for most
@@ -1161,15 +752,6 @@ pub enum MixedBitSet<T> {
     Large(ChunkedBitSet<T>),
 }
 
-impl<T> MixedBitSet<T> {
-    pub fn domain_size(&self) -> usize {
-        match self {
-            MixedBitSet::Small(set) => set.domain_size(),
-            MixedBitSet::Large(set) => set.domain_size(),
-        }
-    }
-}
-
 impl<T: Idx> MixedBitSet<T> {
     #[inline]
     pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
@@ -1204,10 +786,15 @@ impl<T: Idx> MixedBitSet<T> {
         }
     }
 
-    pub fn insert_all(&mut self) {
+    /// Insert `0..domain_size` in the set.
+    ///
+    /// We would like an insert all function that doesn't require the domain size, but the exact
+    /// domain size is not stored in the `Small` variant, so that is not possible.
+    #[inline]
+    pub fn insert_all(&mut self, domain_size: usize) {
         match self {
-            MixedBitSet::Small(set) => set.insert_all(),
-            MixedBitSet::Large(set) => set.insert_all(),
+            Self::Small(set) => set.insert_all(domain_size),
+            Self::Large(set) => set.insert_all(),
         }
     }
 
@@ -1304,87 +891,6 @@ impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
     }
 }
 
-/// A resizable bitset type with a dense representation.
-///
-/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
-/// just be `usize`.
-///
-/// All operations that involve an element will panic if the element is equal
-/// to or greater than the domain size.
-#[derive(Clone, Debug, PartialEq)]
-pub struct GrowableBitSet<T: Idx> {
-    bit_set: DenseBitSet<T>,
-}
-
-impl<T: Idx> Default for GrowableBitSet<T> {
-    fn default() -> Self {
-        GrowableBitSet::new_empty()
-    }
-}
-
-impl<T: Idx> GrowableBitSet<T> {
-    /// Ensure that the set can hold at least `min_domain_size` elements.
-    pub fn ensure(&mut self, min_domain_size: usize) {
-        if self.bit_set.domain_size < min_domain_size {
-            self.bit_set.domain_size = min_domain_size;
-        }
-
-        let min_num_words = num_words(min_domain_size);
-        if self.bit_set.words.len() < min_num_words {
-            self.bit_set.words.resize(min_num_words, 0)
-        }
-    }
-
-    pub fn new_empty() -> GrowableBitSet<T> {
-        GrowableBitSet { bit_set: DenseBitSet::new_empty(0) }
-    }
-
-    pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
-        GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) }
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn insert(&mut self, elem: T) -> bool {
-        self.ensure(elem.index() + 1);
-        self.bit_set.insert(elem)
-    }
-
-    /// Returns `true` if the set has changed.
-    #[inline]
-    pub fn remove(&mut self, elem: T) -> bool {
-        self.ensure(elem.index() + 1);
-        self.bit_set.remove(elem)
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.bit_set.is_empty()
-    }
-
-    #[inline]
-    pub fn contains(&self, elem: T) -> bool {
-        let (word_index, mask) = word_index_and_mask(elem);
-        self.bit_set.words.get(word_index).is_some_and(|word| (word & mask) != 0)
-    }
-
-    #[inline]
-    pub fn iter(&self) -> BitIter<'_, T> {
-        self.bit_set.iter()
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.bit_set.count()
-    }
-}
-
-impl<T: Idx> From<DenseBitSet<T>> for GrowableBitSet<T> {
-    fn from(bit_set: DenseBitSet<T>) -> Self {
-        Self { bit_set }
-    }
-}
-
 /// A fixed-size 2D bit matrix type with a dense representation.
 ///
 /// `R` and `C` are index types used to identify rows and columns respectively;
@@ -1416,14 +922,17 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     }
 
     /// Creates a new matrix, with `row` used as the value for every row.
-    pub fn from_row_n(row: &DenseBitSet<C>, num_rows: usize) -> BitMatrix<R, C> {
-        let num_columns = row.domain_size();
+    pub fn from_row_n(
+        row: &DenseBitSet<C>,
+        num_rows: usize,
+        num_columns: usize,
+    ) -> BitMatrix<R, C> {
         let words_per_row = num_words(num_columns);
-        assert_eq!(words_per_row, row.words.len());
+        assert_eq!(words_per_row, row.words().len());
         BitMatrix {
             num_rows,
             num_columns,
-            words: iter::repeat(&row.words).take(num_rows).flatten().cloned().collect(),
+            words: iter::repeat_with(|| row.words()).take(num_rows).flatten().collect(),
             marker: PhantomData,
         }
     }
@@ -1516,9 +1025,9 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     /// returns `true` if anything changed.
     pub fn union_row_with(&mut self, with: &DenseBitSet<C>, write: R) -> bool {
         assert!(write.index() < self.num_rows);
-        assert_eq!(with.domain_size(), self.num_columns);
+        assert!(with.capacity() >= self.num_columns);
         let (write_start, write_end) = self.range(write);
-        bitwise(&mut self.words[write_start..write_end], &with.words, |a, b| a | b)
+        bitwise(self.words[write_start..write_end].iter_mut(), with.words(), |a, b| a | b)
     }
 
     /// Sets every cell in `row` to true.
@@ -1542,7 +1051,7 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
     pub fn iter(&self, row: R) -> BitIter<'_, C> {
         assert!(row.index() < self.num_rows);
         let (start, end) = self.range(row);
-        BitIter::new(&self.words[start..end])
+        BitIter::from_slice(&self.words[start..end])
     }
 
     /// Returns the number of elements in `row`.
@@ -1657,11 +1166,6 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
         }
     }
 
-    /// Insert all bits in the given row.
-    pub fn insert_all_into_row(&mut self, row: R) {
-        self.ensure_row(row).insert_all();
-    }
-
     pub fn rows(&self) -> impl Iterator<Item = R> {
         self.rows.indices()
     }
@@ -1754,11 +1258,6 @@ fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) {
     }
 }
 
-#[inline]
-fn max_bit(word: Word) -> usize {
-    WORD_BITS - 1 - word.leading_zeros() as usize
-}
-
 /// Integral type used to represent the bit set.
 pub trait FiniteBitSetTy:
     BitAnd<Output = Self>
diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
new file mode 100644
index 0000000000000..7ff6ef285b3ea
--- /dev/null
+++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs
@@ -0,0 +1,1282 @@
+use std::alloc::{Layout, alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc};
+use std::hash::{Hash, Hasher};
+use std::iter::FusedIterator;
+use std::marker::PhantomData;
+use std::mem::ManuallyDrop;
+use std::ops::{Range, RangeInclusive};
+use std::ptr::NonNull;
+use std::{fmt, iter, slice};
+
+use itertools::Either;
+#[cfg(feature = "nightly")]
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+
+use super::{
+    BitRelations, CHUNK_WORDS, Chunk, ChunkedBitSet, WORD_BITS, Word, word_index_and_mask,
+};
+use crate::Idx;
+
+/// A fixed-size bitset type with a dense representation, using only one [`Word`] on the stack.
+///
+/// This bit set occupies only a single [`Word`] of stack space. It can represent a domain size
+/// of up to `[WORD_BITS] - 1` directly inline. If the domain size exceeds this limit, it instead
+/// becomes a pointer to a sequence of [`Word`]s on the heap. This makes it very efficient for
+/// domain sizes smaller than `[WORD_BITS]`.
+///
+/// Additionally, if the set does not fit in one [`Word`], there is a special inline
+/// variant for the empty set. In this case, the domain size is stored inline along with a few
+/// bits indicating that the set is empty. Allocation is deferred until needed, such as on
+/// the first insert or remove operation. This avoids the need to wrap a lazily initialised bit set
+/// in a [`OnceCell`](std::cell::OnceCell) or an [`Option`]—you can simply create an empty set and
+/// populate it if needed.
+///
+/// Note 1: Since this bitset is dense, if your domain is large and/or relatively homogeneous (e.g.
+/// long runs of set or unset bits), it may be more efficient to use a
+/// [`MixedBitSet`](crate::bit_set::MixedBitSet) or an
+/// [`IntervalSet`](crate::interval::IntervalSet), which are better suited for sparse or highly
+/// compressible domains.
+///
+/// Note 2: Use [`GrowableBitSet`] if you require support for resizing after creation.
+///
+/// `T` is an index type—typically a newtyped `usize` wrapper, but it may also simply be `usize`.
+///
+/// Any operation involving an element may panic if the element is equal to or greater than the
+/// domain size. Operations involving two bitsets may panic if their domain sizes differ. Panicking
+/// is not garranteed though as we store the domain size rounded up to the next multiple of
+/// [`WORD_BITS`].
+#[repr(C)]
+pub union DenseBitSet<T> {
+    /// The bit set fits in a single [`Word`] stored inline on the stack.
+    ///
+    /// The most significant bit is set to 1 to distinguish this from the other variants. You
+    /// must never change that "tag bit" after the bit set has been created.
+    ///
+    /// The remaining bits makes up the bit set. The exact domain size is not stored.
+    inline: Word,
+
+    /// The bit set doesn't fit in a single word, but is empty and not yet allocated.
+    ///
+    /// The first (most significant) two bits are set to `[0, 1]` to distinguish this variant
+    /// from others. This tag is stored in [`Self::EMPTY_UNALLOCATED_TAG_BITS`]. The remaining bits
+    /// hold the domain size (capacity) **in words** of the set, which is needed if the set is
+    /// eventually allocated.
+    ///
+    /// Note that because the capacity is stored in words, not in bits, there is plenty of room
+    /// for the two tag bits.
+    empty_unallocated: usize,
+
+    /// The bit set is stored on the heap.
+    ///
+    /// The two most significant bits are set to zero if this field is active.
+    on_heap: ManuallyDrop<BitSetOnHeap>,
+
+    /// This variant will never be created.
+    marker: PhantomData<T>,
+}
+
+impl<T> DenseBitSet<T> {
+    /// The maximum domain size that could be stored inlined on the stack.
+    pub const INLINE_CAPACITY: usize = WORD_BITS - 1;
+
+    /// A [`Word`] with the most significant bit set. That is the tag bit telling that the set is
+    /// inlined.
+    const IS_INLINE_TAG_BIT: Word = 0x1 << (WORD_BITS - 1);
+
+    /// The tag for the `empty_unallocated` variant. The two most significant bits are
+    /// `[0, 1]`.
+    const EMPTY_UNALLOCATED_TAG_BITS: usize = 0b01 << (usize::BITS - 2);
+
+    /// Create a new empty bit set with a given domain_size.
+    ///
+    /// If `domain_size` is <= [`Self::INLINE_CAPACITY`], then it is stored inline on the stack,
+    /// otherwise it is stored on the heap.
+    #[inline]
+    pub fn new_empty(domain_size: usize) -> Self {
+        if domain_size <= Self::INLINE_CAPACITY {
+            // The first bit is set to indicate the union variant.
+            Self { inline: Self::IS_INLINE_TAG_BIT }
+        } else {
+            let num_words = domain_size.div_ceil(WORD_BITS);
+            debug_assert!(num_words.leading_zeros() >= 2);
+            Self { empty_unallocated: Self::EMPTY_UNALLOCATED_TAG_BITS | num_words }
+        }
+    }
+
+    /// Create a new filled bit set.
+    #[inline]
+    pub fn new_filled(domain_size: usize) -> Self {
+        if domain_size <= Self::INLINE_CAPACITY {
+            Self {
+                inline: Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32)
+                    | Self::IS_INLINE_TAG_BIT,
+            }
+        } else {
+            let num_words = domain_size.div_ceil(WORD_BITS);
+            let mut on_heap = BitSetOnHeap::new_empty(num_words);
+            let words = on_heap.as_mut_slice();
+            for word in words.iter_mut() {
+                *word = Word::MAX;
+            }
+            // Remove excessive bits on the last word.
+            // Trust me: this mask is correct.
+            let last_word_mask = Word::MAX.wrapping_shr(domain_size.wrapping_neg() as u32);
+            *words.last_mut().unwrap() &= last_word_mask;
+            Self { on_heap: ManuallyDrop::new(on_heap) }
+        }
+    }
+
+    /// Check if `self` is inlined.
+    // If this function returns `true`, it is safe to assume `self.inline`. Else, it is safe to
+    // assume `self.empty_unallocated`, or `self.on_heap`.
+    #[inline(always)]
+    pub fn is_inline(&self) -> bool {
+        // We check if the first bit is set. If so, it is inlined, otherwise it is on the heap.
+        (unsafe { self.inline } & Self::IS_INLINE_TAG_BIT) != 0
+    }
+
+    /// Check if `self` has a too large domain to be stored inline, is empty, and is not yet
+    /// allocated.
+    // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is
+    // safe to assume `self.inline`, or `self.on_heap`.
+    #[inline(always)]
+    pub const fn is_empty_unallocated(&self) -> bool {
+        const MASK: usize = usize::MAX << usize::BITS - 2;
+        (unsafe { self.empty_unallocated } & MASK) == Self::EMPTY_UNALLOCATED_TAG_BITS
+    }
+
+    /// Check if `self` is `empty_unallocated` and if so return the number of words required to
+    /// store the expected capacity.
+    // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is
+    // safe to assume `self.inline`, or `self.on_heap`.
+    #[inline(always)]
+    pub const fn empty_unallocated_get_num_words(&self) -> Option<usize> {
+        if self.is_empty_unallocated() {
+            Some(unsafe { self.empty_unallocated } ^ Self::EMPTY_UNALLOCATED_TAG_BITS)
+        } else {
+            None
+        }
+    }
+
+    /// Check if `self` is allocated on the heap and return a reference to it in that case.
+    fn on_heap(&self) -> Option<&BitSetOnHeap> {
+        let self_word = unsafe { self.inline };
+        // Check if the two most significant bits are 0.
+        if self_word & Word::MAX >> 2 == self_word { Some(unsafe { &self.on_heap }) } else { None }
+    }
+
+    /// Check if `self` is allocated on the heap and return a mutable reference to it in that case.
+    fn on_heap_mut(&mut self) -> Option<&mut ManuallyDrop<BitSetOnHeap>> {
+        let self_word = unsafe { self.inline };
+        // Check if the two most significant bits are 0.
+        if self_word & Word::MAX >> 2 == self_word {
+            Some(unsafe { &mut self.on_heap })
+        } else {
+            None
+        }
+    }
+
+    /// If `self` is `empty_unallocated`, allocate it, otherwise return `self.on_heap_mut()`.
+    fn on_heap_get_or_alloc(&mut self) -> &mut BitSetOnHeap {
+        if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            *self = Self { on_heap: ManuallyDrop::new(BitSetOnHeap::new_empty(num_words)) };
+            unsafe { &mut self.on_heap }
+        } else {
+            self.on_heap_mut().unwrap()
+        }
+    }
+
+    /// Get the capacity of this set. This is >= the initial domain size.
+    #[inline(always)]
+    pub(super) fn capacity(&self) -> usize {
+        if self.is_inline() {
+            Self::INLINE_CAPACITY
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            num_words * WORD_BITS
+        } else {
+            self.on_heap().unwrap().capacity()
+        }
+    }
+
+    /// Checks if the bit set is empty.
+    #[inline(always)]
+    pub fn is_empty(&self) -> bool {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            x == Self::IS_INLINE_TAG_BIT
+        } else if self.is_empty_unallocated() {
+            true
+        } else {
+            self.on_heap().unwrap().is_empty()
+        }
+    }
+
+    /// Clear the set.
+    #[inline(always)]
+    pub fn clear(&mut self) {
+        if self.is_inline() {
+            self.inline = Self::IS_INLINE_TAG_BIT
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            for word in on_heap.as_mut_slice() {
+                *word = 0x0;
+            }
+        }
+    }
+
+    /// Get an iterator of all words making up the set.
+    pub(super) fn words(&self) -> impl ExactSizeIterator<Item = Word> {
+        if self.is_inline() {
+            let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            Either::Left(iter::once(word))
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            Either::Right(Either::Left(iter::repeat_n(0, num_words)))
+        } else {
+            Either::Right(Either::Right(self.on_heap().unwrap().as_slice().iter().copied()))
+        }
+    }
+
+    /// Checks if `self` is a (non-strict) superset of `other`.
+    ///
+    /// May panic if `self` and other have different sizes.
+    #[inline(always)]
+    pub fn superset(&self, other: &Self) -> bool {
+        // Function to check that a usize is a superset of another.
+        let word_is_superset = |x: Word, other: Word| (!x & other) == 0;
+
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+            word_is_superset(x, y)
+        } else if other.is_empty_unallocated() {
+            true
+        } else {
+            let other_on_heap = other.on_heap().unwrap();
+            if self.is_empty_unallocated() {
+                other_on_heap.is_empty()
+            } else {
+                let on_heap = self.on_heap().unwrap();
+                let self_slice = on_heap.as_slice();
+                let other_slice = other_on_heap.as_slice();
+                debug_assert_eq!(
+                    self_slice.len(),
+                    other_slice.len(),
+                    "bit sets have different domain sizes"
+                );
+                self_slice.iter().zip(other_slice).all(|(&x, &y)| (!x & y) == 0)
+            }
+        }
+    }
+
+    /// Count the number of set bits in the set.
+    #[inline(always)]
+    pub fn count(&self) -> usize {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            x.count_ones() as usize - 1
+        } else if self.is_empty_unallocated() {
+            0
+        } else {
+            self.on_heap().unwrap().as_slice().iter().map(|w| w.count_ones() as usize).sum()
+        }
+    }
+
+    /// Returns an iterator over the indices for all elements in this set.
+    #[inline(always)]
+    pub fn iter_usizes(&self) -> BitIter<'_, usize> {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            // Remove the tag bit.
+            let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT;
+            BitIter::from_single_word(without_tag_bit)
+        } else if let Some(on_heap) = self.on_heap() {
+            BitIter::from_slice(on_heap.as_slice())
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            BitIter::from_single_word(0)
+        }
+    }
+
+    /// Insert the elem with index `idx`. Returns `true` if the set has changed.
+    #[inline(always)]
+    fn insert_usize(&mut self, idx: usize) -> bool {
+        // Insert the `i`th bit in a word and return `true` if it changed.
+        let insert_bit = |word: &mut Word, bit_idx: u32| {
+            let mask = 0x01 << bit_idx;
+            let old = *word;
+            *word |= mask;
+            *word != old
+        };
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            insert_bit(x, idx as u32)
+        } else {
+            let words = self.on_heap_get_or_alloc().as_mut_slice();
+
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = &mut words[word_idx];
+            insert_bit(word, bit_idx)
+        }
+    }
+
+    /// Insert `0..domain_size` in the set.
+    ///
+    /// We would like an insert all function that doesn't require the domain size, but the exact
+    /// domain size is not stored so that is not possible.
+    #[inline(always)]
+    pub fn insert_all(&mut self, domain_size: usize) {
+        if self.is_inline() {
+            debug_assert!(domain_size <= Self::INLINE_CAPACITY);
+            unsafe {
+                self.inline |= Word::MAX.unbounded_shr(WORD_BITS as u32 - domain_size as u32)
+            };
+        } else {
+            let on_heap = self.on_heap_get_or_alloc();
+            debug_assert!(on_heap.capacity() >= domain_size, "domain size too big");
+            let words = on_heap.as_mut_slice();
+
+            let (end_word_index, end_mask) = word_index_and_mask(domain_size - 1);
+
+            for word_index in 0..end_word_index {
+                words[word_index] = Word::MAX;
+            }
+
+            words[end_word_index] |= end_mask | (end_mask - 1);
+        }
+    }
+
+    /// Sets `self = self | !other` for all elements less than `domain_size`.
+    #[inline(always)]
+    pub fn union_not(&mut self, other: &Self, domain_size: usize) {
+        if self.is_inline() {
+            assert!(other.is_inline());
+
+            let self_word = unsafe { &mut self.inline };
+            let other_word = unsafe { other.inline };
+
+            debug_assert!(domain_size <= Self::INLINE_CAPACITY);
+
+            *self_word |= !other_word & Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32);
+        } else if other.is_empty_unallocated() {
+            self.insert_all(domain_size);
+        } else {
+            let self_words = self.on_heap_get_or_alloc().as_mut_slice();
+            let other_words = other.on_heap().unwrap().as_slice();
+
+            // Set all but the last word if domain_size is not divisible by `WORD_BITS`.
+            for (self_word, other_word) in
+                self_words.iter_mut().zip(other_words).take(domain_size / WORD_BITS)
+            {
+                *self_word |= !other_word;
+            }
+
+            let remaining_bits = domain_size % WORD_BITS;
+            if remaining_bits > 0 {
+                let last_idx = domain_size / WORD_BITS;
+                self_words[last_idx] |= !other_words[last_idx] & !(Word::MAX << remaining_bits);
+            }
+        }
+    }
+
+    /// Common function for union/intersection-like operations.
+    ///
+    /// This function takes two bit sets—one mutably, one immutably. Neither must be the
+    /// `empty_unallocated` variant. It asserts that they have the same `domain_size`, then applies a function to
+    /// each pair of words, effectively performing a zip-like operation.
+    /// It checks whether `self` has changed; if so, it returns `true`, otherwise `false`.
+    ///
+    /// ## Safety
+    ///
+    /// - Neither set must be `self.empty_unallocated`.
+    /// - If the sets are inlined, this will leave the tag bit set to 1. You must not modify it—doing so
+    ///   results in undefined behaviour. This may be inconvenient for operations such as subtraction;
+    ///   in such cases, use `binary_operation_safe` instead.
+    #[inline(always)]
+    unsafe fn binary_operation(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool {
+        debug_assert!(!self.is_empty_unallocated());
+        debug_assert!(!other.is_empty_unallocated());
+
+        // Apply `op` and return if the word changed.
+        let apply_and_check_change = |x: &mut Word, y: Word| -> bool {
+            let old = *x;
+            op(x, y);
+            *x != old
+        };
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+            apply_and_check_change(x, y)
+        } else {
+            let self_on_heap = unsafe { &mut self.on_heap };
+            assert!(!other.is_inline(), "bit sets has different domain sizes");
+            let other_on_heap = unsafe { &other.on_heap };
+            let self_slice = self_on_heap.as_mut_slice();
+            let other_slice = other_on_heap.as_slice();
+            assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes");
+            let mut has_changed = false;
+            for (x, y) in self_slice.iter_mut().zip(other_slice) {
+                has_changed |= apply_and_check_change(x, *y);
+            }
+            has_changed
+        }
+    }
+
+    /// Similar to [`Self::binary_operation`], but restores the tag bit if it has changed.
+    ///
+    /// Note that the tag bit will still be set in the call to `op`, but there is no danger in
+    /// changing it as it will be restored afterwords.
+    ///
+    /// ## Safety
+    ///
+    /// Neither set must be `self.empty_unallocated`.
+    #[inline(always)]
+    unsafe fn binary_operation_safe(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool {
+        debug_assert!(!self.is_empty_unallocated());
+        debug_assert!(!other.is_empty_unallocated());
+
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            assert!(other.is_inline(), "bit sets has different domain sizes");
+            let y = unsafe { other.inline };
+
+            let old = *x;
+            op(x, y);
+            *x |= Self::IS_INLINE_TAG_BIT;
+            old != *x
+        } else {
+            let self_on_heap = unsafe { &mut self.on_heap };
+            assert!(!other.is_inline(), "bit sets has different domain sizes");
+            let other_on_heap = unsafe { &other.on_heap };
+            let self_slice = self_on_heap.as_mut_slice();
+            let other_slice = other_on_heap.as_slice();
+            assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes");
+            let mut has_changed = false;
+            for (x, y) in self_slice.iter_mut().zip(other_slice) {
+                let old = *x;
+                op(x, *y);
+                has_changed |= old != *x;
+            }
+            has_changed
+        }
+    }
+
+    super::bit_relations_inherent_impls! {}
+}
+
+impl<T> BitRelations<DenseBitSet<T>> for DenseBitSet<T> {
+    #[inline(always)]
+    fn union(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() {
+            debug_assert!(!other.is_inline());
+            *self = other.clone();
+            !self.is_empty()
+        } else if other.is_empty_unallocated() {
+            false
+        } else {
+            // SAFETY: The union operation does not remove any bit set to 1, so the tag bit is
+            // unaffected.
+            unsafe { self.binary_operation(other, |x, y| *x |= y) }
+        }
+    }
+
+    #[inline(always)]
+    fn intersect(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() {
+            false
+        } else if other.is_empty_unallocated() {
+            debug_assert!(!self.is_inline());
+            let was_empty = self.is_empty();
+            self.clear();
+            !was_empty
+        } else {
+            // SAFETY: Since the tag bit is set in both `self` and `other`, the intersection won't
+            // remove it.
+            unsafe { self.binary_operation(other, |x, y| *x &= y) }
+        }
+    }
+
+    #[inline(always)]
+    fn subtract(&mut self, other: &Self) -> bool {
+        if self.is_empty_unallocated() || other.is_empty_unallocated() {
+            false
+        } else {
+            unsafe { self.binary_operation_safe(other, |x, y| *x &= !y) }
+        }
+    }
+}
+
+impl<T: Idx> DenseBitSet<T> {
+    /// Checks if the bit set contains `elem`.
+    #[inline(always)]
+    pub fn contains(&self, elem: T) -> bool {
+        // Check if the `i`th bit is set in a word.
+        let contains_bit = |word: Word, bit_idx: u32| {
+            let mask = 0x01 << bit_idx;
+            (word & mask) != 0
+        };
+
+        let idx = elem.index();
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            contains_bit(x, idx as u32)
+        } else if let Some(on_heap) = self.on_heap() {
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = on_heap.as_slice()[word_idx];
+            contains_bit(word, bit_idx)
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            false
+        }
+    }
+
+    /// Insert `elem`. Returns `true` if the set has changed.
+    #[inline(always)]
+    pub fn insert(&mut self, elem: T) -> bool {
+        self.insert_usize(elem.index())
+    }
+
+    /// Remove `elem`. Returns `true` if the set has changed.
+    #[inline(always)]
+    pub fn remove(&mut self, elem: T) -> bool {
+        // Remove the `i`th bit in a word and return `true` if it changed.
+        let remove_bit = |word: &mut Word, bit_idx: u32| {
+            let mask = !(0x01 << bit_idx);
+            let old = *word;
+            *word &= mask;
+            *word != old
+        };
+
+        let idx = elem.index();
+        if self.is_inline() {
+            let x = unsafe { &mut self.inline };
+            debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}");
+            remove_bit(x, idx as u32)
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            let word_idx = idx / WORD_BITS;
+            let bit_idx = (idx % WORD_BITS) as u32;
+            let word = &mut on_heap.as_mut_slice()[word_idx];
+            remove_bit(word, bit_idx)
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            // Nothing to be removed.
+            false
+        }
+    }
+
+    /// Returns an iterator over all elements in this set.
+    #[inline(always)]
+    pub fn iter(&self) -> BitIter<'_, T> {
+        if self.is_inline() {
+            let x = unsafe { self.inline };
+            // Remove the tag bit.
+            let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT;
+            BitIter::from_single_word(without_tag_bit)
+        } else if let Some(on_heap) = self.on_heap() {
+            BitIter::from_slice(on_heap.as_slice())
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            BitIter::from_single_word(0)
+        }
+    }
+
+    /// Returns `Some(elem)` if the set contains exactly one elemement otherwise returns `None`.
+    #[inline(always)]
+    pub fn only_one_elem(&self) -> Option<T> {
+        if self.is_inline() {
+            let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            if word.is_power_of_two() { Some(T::new(word.trailing_zeros() as usize)) } else { None }
+        } else if self.is_empty_unallocated() {
+            None
+        } else {
+            let words = self.on_heap().unwrap().as_slice();
+            let mut found_elem = None;
+            for (i, &word) in words.iter().enumerate() {
+                if word > 0 {
+                    if found_elem.is_some() {
+                        return None;
+                    }
+                    if word.is_power_of_two() {
+                        found_elem =
+                            Some(T::new(i * WORD_BITS as usize + word.trailing_zeros() as usize));
+                    } else {
+                        return None;
+                    }
+                }
+            }
+            found_elem
+        }
+    }
+
+    #[inline]
+    pub fn insert_range(&mut self, range: Range<T>) {
+        if let Some(end) = range.end.index().checked_sub(1) {
+            self.insert_range_inclusive(RangeInclusive::new(range.start, Idx::new(end)));
+        }
+    }
+
+    #[inline(always)]
+    pub fn insert_range_inclusive(&mut self, range: RangeInclusive<T>) {
+        let start = range.start().index();
+        let end = range.end().index();
+
+        if start > end {
+            return;
+        }
+
+        if self.is_inline() {
+            debug_assert!(end < Self::INLINE_CAPACITY);
+            let mask = (1 << end) | ((1 << end) - (1 << start));
+            unsafe { self.inline |= mask };
+        } else {
+            let words = self.on_heap_get_or_alloc().as_mut_slice();
+
+            let (start_word_index, start_mask) = word_index_and_mask(start);
+            let (end_word_index, end_mask) = word_index_and_mask(end);
+
+            // Set all words in between start and end (exclusively of both).
+            for word_index in (start_word_index + 1)..end_word_index {
+                words[word_index] = !0;
+            }
+
+            if start_word_index != end_word_index {
+                // Start and end are in different words, so we handle each in turn.
+                //
+                // We set all leading bits. This includes the start_mask bit.
+                words[start_word_index] |= !(start_mask - 1);
+                // And all trailing bits (i.e. from 0..=end) in the end word,
+                // including the end.
+                words[end_word_index] |= end_mask | (end_mask - 1);
+            } else {
+                words[start_word_index] |= end_mask | (end_mask - start_mask);
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn last_set_in(&self, range: RangeInclusive<T>) -> Option<T> {
+        let start = range.start().index();
+        let end = range.end().index();
+
+        if start > end {
+            return None;
+        }
+
+        if self.is_inline() {
+            debug_assert!(end < Self::INLINE_CAPACITY);
+            let mut word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+            let end_bit = 1 << end;
+            // Set all bits mor significant than `end_bit` to zero.
+            word &= end_bit | (end_bit - 1);
+            if word != 0 {
+                let pos = max_bit(word);
+                if start <= pos { Some(T::new(pos)) } else { None }
+            } else {
+                None
+            }
+        } else if let Some(on_heap) = self.on_heap() {
+            let words = on_heap.as_slice();
+
+            let (start_word_index, _) = word_index_and_mask(start);
+            let (end_word_index, end_mask) = word_index_and_mask(end);
+
+            let end_word = words[end_word_index] & (end_mask | (end_mask - 1));
+            if end_word != 0 {
+                let pos = max_bit(end_word) + WORD_BITS * end_word_index;
+                if start <= pos {
+                    return Some(T::new(pos));
+                }
+            }
+
+            // We exclude end_word_index from the range here, because we don't want
+            // to limit ourselves to *just* the last word: the bits set it in may be
+            // after `end`, so it may not work out.
+            if let Some(offset) =
+                words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
+            {
+                let word_idx = start_word_index + offset;
+                let start_word = words[word_idx];
+                let pos = max_bit(start_word) + WORD_BITS * word_idx;
+                if start <= pos { Some(T::new(pos)) } else { None }
+            } else {
+                None
+            }
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            None
+        }
+    }
+}
+
+impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
+    fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
+        other.iter().fold(false, |changed, elem| self.insert(elem) || changed)
+    }
+
+    fn subtract(&mut self, _other: &ChunkedBitSet<T>) -> bool {
+        unimplemented!("implement if/when necessary");
+    }
+
+    fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
+        if self.is_inline() {
+            assert!(other.domain_size <= Self::INLINE_CAPACITY);
+            if other.domain_size == 0 {
+                return false;
+            }
+
+            let word = unsafe { &mut self.inline };
+            let old_word = *word;
+            match &other.chunks[0] {
+                Chunk::Zeros(d) => {
+                    debug_assert_eq!(usize::from(*d), other.domain_size);
+                    let mask = Word::MAX << other.domain_size();
+                    *word &= mask;
+                }
+                Chunk::Ones(_) => (),
+                Chunk::Mixed(d, _, words) => {
+                    debug_assert_eq!(usize::from(*d), other.domain_size);
+                    *word &= words[0] | Self::IS_INLINE_TAG_BIT;
+                }
+            }
+            *word != old_word
+        } else if let Some(on_heap) = self.on_heap_mut() {
+            let all_words = on_heap.as_mut_slice();
+
+            let mut changed = false;
+            for (i, chunk) in other.chunks.iter().enumerate() {
+                let mut words = &mut all_words[i * CHUNK_WORDS..];
+                if words.len() > CHUNK_WORDS {
+                    words = &mut words[..CHUNK_WORDS];
+                }
+                match chunk {
+                    Chunk::Zeros(..) => {
+                        for word in words {
+                            if *word != 0 {
+                                changed = true;
+                                *word = 0;
+                            }
+                        }
+                    }
+                    Chunk::Ones(..) => (),
+                    Chunk::Mixed(_, _, data) => {
+                        for (i, word) in words.iter_mut().enumerate() {
+                            let new_val = *word & data[i];
+                            if new_val != *word {
+                                changed = true;
+                                *word = new_val;
+                            }
+                        }
+                    }
+                }
+            }
+            changed
+        } else {
+            debug_assert!(self.is_empty_unallocated());
+            false
+        }
+    }
+}
+
+#[cfg(feature = "nightly")]
+impl<S: Encoder, T> Encodable<S> for DenseBitSet<T> {
+    #[inline(never)] // FIXME: For profiling purposes
+    fn encode(&self, s: &mut S) {
+        // The encoding is as follows:
+        //
+        // The `inline` and `empty_unallocated` variants are encoded as a single `Word`. Here, we
+        // consider the `empty_unallocated` variant as the `inline` variant because
+        // `empty_unallocated: usize`, `inline: Word`, and `usize` is smaller than `Word`.
+        //
+        // The `on_heap` variant is encoded as follows: First, the number of `Word`s are encoded
+        // with a single `Word`. We assert that the two most significant bits of this number are 0
+        // to distinguish it from the `inline` and `empty_unallocated` variants. Then all the words are
+        // encoded in sequence.
+
+        if let Some(on_heap) = self.on_heap() {
+            let n_words: Word = on_heap.n_words();
+            debug_assert_eq!(
+                n_words >> WORD_BITS - 2,
+                0x0,
+                "the two most significant bits must be 0"
+            );
+            n_words.encode(s);
+            debug_assert_eq!(n_words as usize, on_heap.as_slice().len());
+            for word in on_heap.as_slice().iter() {
+                word.encode(s);
+            }
+        } else {
+            let word = unsafe { self.inline };
+            debug_assert!(word >> WORD_BITS - 2 != 0, "the 2 most significant bits must not be 0");
+            word.encode(s);
+        }
+    }
+}
+
+#[cfg(feature = "nightly")]
+impl<D: Decoder, T> Decodable<D> for DenseBitSet<T> {
+    #[inline(never)] // FIXME: For profiling purposes
+    fn decode(d: &mut D) -> Self {
+        // First we read one `Word` and check the variant.
+        let word = Word::decode(d);
+        if word >> WORD_BITS - 2 == 0x0 {
+            // If the two most significant bits are 0, then this is the `on_heap` variant and the
+            // number of words is encoded by `word`.
+            let n_words = word as usize;
+            assert!(
+                n_words > 0,
+                "DenseBitSet decoder error: At least one word must be stored with the `on_heap` variant."
+            );
+            let mut on_heap = BitSetOnHeap::new_empty(n_words);
+
+            let words = on_heap.as_mut_slice();
+            // All `words` are now initialised to 0x0.
+            debug_assert_eq!(words.len(), n_words);
+
+            // Decode the words one-by-one.
+            for word in words.iter_mut() {
+                *word = Word::decode(d);
+            }
+
+            DenseBitSet { on_heap: ManuallyDrop::new(on_heap) }
+        } else {
+            // Both the `inline` and `empty_unallocated` variants are encoded by one `Word`. We can
+            // just assume the `inline` variant because the `empty_unallocated` variant is smaller
+            // and the union is `repr(C)`.
+            Self { inline: word }
+        }
+    }
+}
+
+impl<T> Clone for DenseBitSet<T> {
+    #[inline(always)]
+    fn clone(&self) -> Self {
+        if self.is_inline() {
+            let inline = unsafe { self.inline };
+            Self { inline }
+        } else if self.is_empty_unallocated() {
+            let empty_unallocated = unsafe { self.empty_unallocated };
+            Self { empty_unallocated }
+        } else {
+            let old_on_heap = unsafe { &self.on_heap };
+            let on_heap = old_on_heap.clone();
+            Self { on_heap }
+        }
+    }
+}
+
+impl<T> Drop for DenseBitSet<T> {
+    #[inline(always)]
+    fn drop(&mut self) {
+        // Deallocate if `self` is not inlined.
+        if let Some(on_heap) = self.on_heap_mut() {
+            unsafe {
+                ManuallyDrop::drop(on_heap);
+            }
+        }
+    }
+}
+
+/// A pointer to a dense bit set stored on the heap.
+///
+/// This struct is a `usize`, with its two most significant bits always set to 0. If the value is
+/// shifted left by 2 bits, it yields a pointer to a sequence of words on the heap. The first word
+/// in this sequence represents the length—it indicates how many words follow. These subsequent
+/// words make up the actual bit set.
+///
+/// For example, suppose the bit set should support a domain size of 240 bits. We first determine
+/// how many words are needed to store 240 bits—that’s 4 words, assuming `[WORD_BITS] == 64`.
+/// The pointer in this struct then points to a sequence of five words allocated on the heap. The
+/// first word has the value 4 (the length), and the remaining four words comprise the bit set.
+#[repr(transparent)]
+struct BitSetOnHeap(usize);
+
+impl BitSetOnHeap {
+    fn new_empty(len: usize) -> Self {
+        debug_assert!(len >= 1);
+
+        // The first word is used to store the total number of words. The rest of the words
+        // store the bits.
+        let num_words = len + 1;
+
+        let layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let ptr = unsafe { alloc_zeroed(layout).cast::<Word>() };
+        let Some(ptr) = NonNull::<Word>::new(ptr) else {
+            handle_alloc_error(layout);
+        };
+
+        // Store the length in the first word.
+        unsafe { ptr.write(len as Word) };
+
+        // Convert `ptr` to a `usize` and shift it two bits to the right.
+        BitSetOnHeap((ptr.as_ptr() as usize) >> 2)
+    }
+
+    /// Get a slice with all bits in this bit set.
+    ///
+    /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So
+    /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a
+    /// 64-bit machine.
+    #[inline]
+    fn as_slice(&self) -> &[Word] {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        // The slice starts at the second word.
+        unsafe { slice::from_raw_parts(ptr.add(1), len) }
+    }
+
+    /// Get a mutable slice with all bits in this bit set.
+    ///
+    /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So
+    /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a
+    /// 64-bit machine.
+    #[inline]
+    fn as_mut_slice(&mut self) -> &mut [Word] {
+        let ptr = (self.0 << 2) as *mut Word;
+        let len = unsafe { ptr.read() } as usize;
+        // The slice starts at the second word.
+        unsafe { slice::from_raw_parts_mut(ptr.add(1), len) }
+    }
+
+    /// Check if the set is empty.
+    fn is_empty(&self) -> bool {
+        self.as_slice().iter().all(|&x| x == 0)
+    }
+
+    /// Get the number of words.
+    #[allow(dead_code)] // FIXME
+    #[inline]
+    fn n_words(&self) -> Word {
+        let ptr = (self.0 << 2) as *const Word;
+        unsafe { ptr.read() }
+    }
+
+    /// Get the capacity, that is the number of elements that can be stored in this set.
+    fn capacity(&self) -> usize {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        len * WORD_BITS
+    }
+
+    /// Make sure the set can hold at least `min_domain_size` elements. Reallocate if necessary.
+    fn ensure_capacity(&mut self, min_domain_size: usize) {
+        let len = min_domain_size.div_ceil(WORD_BITS);
+
+        let old_ptr = (self.0 << 2) as *const Word;
+        let old_len = unsafe { old_ptr.read() } as usize;
+
+        if len <= old_len {
+            return;
+        }
+
+        // The first word is used to store the total number of words. The rest of the words
+        // store the bits.
+        let num_words = len + 1;
+        let old_num_words = old_len + 1;
+
+        let new_layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        let old_layout = Layout::array::<usize>(old_num_words).expect("Bit set too large");
+
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let ptr =
+            unsafe { realloc(old_ptr as *mut u8, old_layout, new_layout.size()).cast::<Word>() };
+        let Some(ptr) = NonNull::<Word>::new(ptr) else {
+            handle_alloc_error(new_layout);
+        };
+
+        // Store the length in the first word.
+        unsafe { ptr.write(len as Word) };
+
+        // Set all the new words to 0.
+        for word_idx in old_num_words..num_words {
+            unsafe { ptr.add(word_idx).write(0x0) }
+        }
+
+        // Convert `ptr` to a `usize` and shift it two bits to the right.
+        self.0 = (ptr.as_ptr() as usize) >> 2
+    }
+}
+
+impl Clone for BitSetOnHeap {
+    fn clone(&self) -> Self {
+        let ptr = (self.0 << 2) as *const Word;
+        let len = unsafe { ptr.read() } as usize;
+        let num_words = len + 1;
+
+        let layout = Layout::array::<usize>(num_words).expect("Bit set too large");
+        // SAFETY: `num_words` is always at least `1` so we never allocate zero size.
+        let new_ptr = unsafe { alloc(layout).cast::<Word>() };
+        let Some(new_ptr) = NonNull::<Word>::new(new_ptr) else {
+            handle_alloc_error(layout);
+        };
+
+        unsafe { ptr.copy_to_nonoverlapping(new_ptr.as_ptr(), num_words) };
+
+        BitSetOnHeap((new_ptr.as_ptr() as usize) >> 2)
+    }
+}
+
+impl Drop for BitSetOnHeap {
+    fn drop(&mut self) {
+        let ptr = (self.0 << 2) as *mut Word;
+
+        // SAFETY: The first word stores the number of words for the bit set. We have to add 1
+        // because the first word storing the length is allocated as well.
+        let num_words = unsafe { ptr.read() } as usize + 1;
+        let layout = Layout::array::<Word>(num_words).expect("Bit set too large");
+        // SAFETY: We know that `on_heap` has been allocated with the same layout. See the
+        // `new` method for reference.
+        unsafe { dealloc(ptr.cast::<u8>(), layout) };
+    }
+}
+
+pub struct BitIter<'a, T: Idx> {
+    /// A copy of the current word, but with any already-visited bits cleared.
+    /// (This lets us use `trailing_zeros()` to find the next set bit.) When it
+    /// is reduced to 0, we move onto the next word.
+    word: Word,
+
+    /// The offset (measured in bits) of the current word.
+    offset: usize,
+
+    /// Underlying iterator over the words.
+    iter: slice::Iter<'a, Word>,
+
+    marker: PhantomData<T>,
+}
+
+impl<'a, T: Idx> BitIter<'a, T> {
+    pub(super) fn from_slice(words: &'a [Word]) -> Self {
+        // We initialize `word` and `offset` to degenerate values. On the first
+        // call to `next()` we will fall through to getting the first word from
+        // `iter`, which sets `word` to the first word (if there is one) and
+        // `offset` to 0. Doing it this way saves us from having to maintain
+        // additional state about whether we have started.
+        Self {
+            word: 0,
+            offset: usize::MAX - (WORD_BITS - 1),
+            iter: words.iter(),
+            marker: PhantomData,
+        }
+    }
+
+    #[inline(always)]
+    fn from_single_word(word: Word) -> Self {
+        Self { word, offset: 0, iter: [].iter(), marker: PhantomData }
+    }
+}
+
+impl<'a, T: Idx> Iterator for BitIter<'a, T> {
+    type Item = T;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<T> {
+        loop {
+            if self.word != 0 {
+                // Get the position of the next set bit in the current word,
+                // then clear the bit.
+                let bit_pos = self.word.trailing_zeros() as usize;
+                self.word ^= 0x01 << bit_pos;
+                return Some(T::new(bit_pos + self.offset));
+            }
+
+            // Move onto the next word. `wrapping_add()` is needed to handle
+            // the degenerate initial value given to `offset` in `new()`.
+            self.word = *self.iter.next()?;
+            self.offset = self.offset.wrapping_add(WORD_BITS);
+        }
+    }
+}
+
+impl<'a, T: Idx> FusedIterator for BitIter<'a, T> {}
+
+impl<T: Idx> fmt::Debug for DenseBitSet<T> {
+    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
+        w.debug_list().entries(self.iter()).finish()
+    }
+}
+
+impl<T> PartialEq for DenseBitSet<T> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        if self.is_inline() {
+            if other.is_inline() {
+                unsafe { self.inline == other.inline }
+            } else if other.is_empty_unallocated() {
+                self.is_empty()
+            } else {
+                let other_words = other.on_heap().unwrap().as_slice();
+                let self_word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT;
+                other_words[0] == self_word && other_words[1..].iter().all(|&w| w == 0)
+            }
+        } else if self.is_empty_unallocated() {
+            other.is_empty()
+        } else {
+            let self_words = self.on_heap().unwrap().as_slice();
+            if other.is_empty_unallocated() {
+                self_words.iter().all(|&w| w == 0)
+            } else if other.is_inline() {
+                let other_word = unsafe { other.inline } ^ Self::IS_INLINE_TAG_BIT;
+                self_words[0] == other_word && self_words[1..].iter().all(|&w| w == 0)
+            } else {
+                let mut self_words = self_words.iter();
+                let mut other_words = other.on_heap().unwrap().as_slice().iter();
+                loop {
+                    match (self_words.next(), other_words.next()) {
+                        (Some(w1), Some(w2)) if w1 == w2 => (),
+                        (Some(_), Some(_)) => break false,
+                        (Some(0), None) | (None, Some(0)) => (),
+                        (Some(_), None) | (None, Some(_)) => break false,
+                        (None, None) => break true,
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<T> Eq for DenseBitSet<T> {}
+
+impl<T> Hash for DenseBitSet<T> {
+    #[inline]
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        if self.is_inline() {
+            let inline = unsafe { self.inline };
+            inline.hash(hasher);
+        } else if let Some(num_words) = self.empty_unallocated_get_num_words() {
+            // Now we hash 0 for `num_words` times so that this hash should be equal to a cleared
+            // set with the `on_heap` variant.
+            for _ in 0..num_words {
+                let zero_word: Word = 0x0;
+                zero_word.hash(hasher);
+            }
+        } else {
+            let words = self.on_heap().unwrap().as_slice();
+            for word in words {
+                word.hash(hasher);
+            }
+        }
+    }
+}
+
+/// A resizable bitset type with a dense representation.
+///
+/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
+/// just be `usize`.
+///
+/// All operations that involve an element will panic if the element is equal
+/// to or greater than the domain size.
+#[derive(Clone, PartialEq)]
+pub struct GrowableBitSet<T> {
+    bit_set: DenseBitSet<T>,
+}
+
+impl<T> Default for GrowableBitSet<T> {
+    fn default() -> Self {
+        GrowableBitSet::new_empty()
+    }
+}
+
+impl<T> GrowableBitSet<T> {
+    /// Ensure that the set can hold at least `min_domain_size` elements.
+    pub fn ensure(&mut self, min_domain_size: usize) {
+        if min_domain_size <= self.bit_set.capacity() {
+            return;
+        }
+
+        if self.bit_set.is_inline() {
+            // The set must change from being inlined to allocate on the heap.
+            debug_assert!(min_domain_size > DenseBitSet::<T>::INLINE_CAPACITY);
+
+            let mut new_bit_set = DenseBitSet::new_empty(min_domain_size);
+            if !self.bit_set.is_empty() {
+                // SAFETY: We know that `self.is_inline()` is true.
+                let word = unsafe { self.bit_set.inline } ^ DenseBitSet::<T>::IS_INLINE_TAG_BIT;
+                new_bit_set.on_heap_get_or_alloc().as_mut_slice()[0] = word;
+            }
+            self.bit_set = new_bit_set;
+        } else if self.bit_set.is_empty_unallocated() {
+            self.bit_set = DenseBitSet::new_empty(min_domain_size);
+        } else {
+            self.bit_set.on_heap_mut().unwrap().ensure_capacity(min_domain_size);
+        }
+    }
+
+    pub fn new_empty() -> GrowableBitSet<T> {
+        GrowableBitSet { bit_set: DenseBitSet::new_empty(0) }
+    }
+
+    pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
+        GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) }
+    }
+
+    /// Insert the element with index `idx`. Returns `true` if the set has changed.
+    #[inline]
+    pub fn insert_usize(&mut self, idx: usize) -> bool {
+        self.ensure(idx + 1);
+        self.bit_set.insert_usize(idx)
+    }
+}
+
+impl<T: Idx> GrowableBitSet<T> {
+    /// Insert `elem` into the set, resizing if necessary. Returns `true` if the set has changed.
+    #[inline]
+    pub fn insert(&mut self, elem: T) -> bool {
+        self.insert_usize(elem.index())
+    }
+
+    /// Returns `true` if the set has changed.
+    #[inline]
+    pub fn remove(&mut self, elem: T) -> bool {
+        self.ensure(elem.index() + 1);
+        self.bit_set.remove(elem)
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.bit_set.is_empty()
+    }
+
+    #[inline]
+    pub fn contains(&self, elem: T) -> bool {
+        elem.index() < self.bit_set.capacity() && self.bit_set.contains(elem)
+    }
+
+    #[inline]
+    pub fn iter(&self) -> BitIter<'_, T> {
+        self.bit_set.iter()
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.bit_set.count()
+    }
+}
+
+impl<T> From<DenseBitSet<T>> for GrowableBitSet<T> {
+    fn from(bit_set: DenseBitSet<T>) -> Self {
+        Self { bit_set }
+    }
+}
+
+impl<T> From<GrowableBitSet<T>> for DenseBitSet<T> {
+    fn from(bit_set: GrowableBitSet<T>) -> Self {
+        bit_set.bit_set
+    }
+}
+
+impl<T: Idx> fmt::Debug for GrowableBitSet<T> {
+    fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.bit_set.fmt(w)
+    }
+}
+
+#[inline]
+fn max_bit(word: Word) -> usize {
+    WORD_BITS - 1 - word.leading_zeros() as usize
+}
diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs
index 323a66ddc6f20..eea19cb11a101 100644
--- a/compiler/rustc_index/src/bit_set/tests.rs
+++ b/compiler/rustc_index/src/bit_set/tests.rs
@@ -1,9 +1,583 @@
+use std::collections::BTreeSet;
+use std::hash::{BuildHasher, BuildHasherDefault, DefaultHasher};
+use std::hint::black_box;
+use std::ops::{Range, RangeBounds, RangeInclusive};
+
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use test::Bencher;
+
 use super::*;
+use crate::IndexVec;
 
 extern crate test;
-use std::hint::black_box;
 
-use test::Bencher;
+/// A very simple pseudo random generator using linear xorshift.
+///
+/// [See Wikipedia](https://en.wikipedia.org/wiki/Xorshift). This has 64-bit state and a period
+/// of `2^64 - 1`.
+struct Rng(u64);
+
+impl Rng {
+    fn new(seed: u64) -> Self {
+        Rng(seed)
+    }
+
+    fn next(&mut self) -> usize {
+        self.0 ^= self.0 << 7;
+        self.0 ^= self.0 >> 9;
+        self.0 as usize
+    }
+
+    fn next_bool(&mut self) -> bool {
+        self.next() % 2 == 0
+    }
+
+    /// Sample a range, a subset of `0..=max`.
+    ///
+    /// The purpose of this method is to make edge cases such as `0..=max` more common.
+    fn sample_range(&mut self, max: usize) -> RangeInclusive<usize> {
+        let start = match self.next() % 3 {
+            0 => 0,
+            1 => max,
+            2 => self.next() % (max + 1),
+            _ => unreachable!(),
+        };
+        let end = match self.next() % 3 {
+            0 => 0,
+            1 => max,
+            2 => self.next() % (max + 1),
+            _ => unreachable!(),
+        };
+        RangeInclusive::new(start, end)
+    }
+}
+
+#[derive(Default)]
+struct EncoderLittleEndian {
+    bytes: Vec<u8>,
+}
+
+impl Encoder for EncoderLittleEndian {
+    fn emit_usize(&mut self, v: usize) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u8(&mut self, v: u8) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u16(&mut self, v: u16) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u32(&mut self, v: u32) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u64(&mut self, v: u64) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_u128(&mut self, v: u128) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_isize(&mut self, v: isize) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i8(&mut self, v: i8) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i16(&mut self, v: i16) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i32(&mut self, v: i32) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i64(&mut self, v: i64) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_i128(&mut self, v: i128) {
+        self.bytes.extend(v.to_le_bytes());
+    }
+    fn emit_raw_bytes(&mut self, v: &[u8]) {
+        self.bytes.extend(v);
+    }
+}
+
+struct DecoderLittleEndian<'a> {
+    bytes: &'a [u8],
+    /// Remember the original `bytes.len()` so we can calculate how many bytes we've read.
+    original_len: usize,
+}
+
+impl<'a> DecoderLittleEndian<'a> {
+    fn new(bytes: &'a [u8]) -> Self {
+        Self { bytes, original_len: bytes.len() }
+    }
+}
+
+impl<'a> Decoder for DecoderLittleEndian<'a> {
+    fn read_usize(&mut self) -> usize {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<usize>());
+        self.bytes = rest;
+        usize::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u128(&mut self) -> u128 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u128>());
+        self.bytes = rest;
+        u128::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u64(&mut self) -> u64 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u64>());
+        self.bytes = rest;
+        u64::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u32(&mut self) -> u32 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u32>());
+        self.bytes = rest;
+        u32::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u16(&mut self) -> u16 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u16>());
+        self.bytes = rest;
+        u16::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_u8(&mut self) -> u8 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<u8>());
+        self.bytes = rest;
+        u8::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_isize(&mut self) -> isize {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<isize>());
+        self.bytes = rest;
+        isize::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i128(&mut self) -> i128 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i128>());
+        self.bytes = rest;
+        i128::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i64(&mut self) -> i64 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i64>());
+        self.bytes = rest;
+        i64::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i32(&mut self) -> i32 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i32>());
+        self.bytes = rest;
+        i32::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_i16(&mut self) -> i16 {
+        let (int_bytes, rest) = self.bytes.split_at(size_of::<i16>());
+        self.bytes = rest;
+        i16::from_le_bytes(int_bytes.try_into().unwrap())
+    }
+    fn read_raw_bytes(&mut self, len: usize) -> &[u8] {
+        let (bytes, rest) = self.bytes.split_at(len);
+        self.bytes = rest;
+        bytes
+    }
+    fn peek_byte(&self) -> u8 {
+        self.bytes[0]
+    }
+    fn position(&self) -> usize {
+        self.original_len - self.bytes.len()
+    }
+}
+
+fn test_with_domain_size(domain_size: usize) {
+    const TEST_ITERATIONS: u32 = 512;
+
+    let mut set_1 = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut set_1_reference = IndexVec::<usize, bool>::from_elem_n(false, domain_size);
+    let mut set_2 = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut set_2_reference = IndexVec::<usize, bool>::from_elem_n(false, domain_size);
+
+    let hasher = BuildHasherDefault::<DefaultHasher>::new();
+
+    let mut encoder = EncoderLittleEndian::default();
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        // Make a random operation.
+        match rng.next() % 100 {
+            0..20 => {
+                // Insert in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    assert_eq!(!set_1.contains(elem), set_1.insert(elem));
+                    set_1_reference[elem] = true;
+                } else {
+                    assert_eq!(!set_2.contains(elem), set_2.insert(elem));
+                    set_2_reference[elem] = true;
+                }
+            }
+            20..40 => {
+                // Insert a range in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+
+                let range = rng.sample_range(domain_size - 1);
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    set_1.insert_range_inclusive(range.clone());
+                    for i in range {
+                        set_1_reference[i] = true;
+                    }
+                } else {
+                    set_2.insert_range_inclusive(range.clone());
+                    for i in range {
+                        set_2_reference[i] = true;
+                    }
+                }
+            }
+            40..50 => {
+                // Test insert_all().
+                if rng.next_bool() {
+                    set_1.insert_all(domain_size);
+                    for x in set_1_reference.iter_mut() {
+                        *x = true;
+                    }
+                } else {
+                    set_2.insert_all(domain_size);
+                    for x in set_2_reference.iter_mut() {
+                        *x = true;
+                    }
+                }
+            }
+            50..70 => {
+                // Remove from one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to remove into.
+                if rng.next_bool() {
+                    assert_eq!(set_1.contains(elem), set_1.remove(elem),);
+                    set_1_reference[elem] = false;
+                } else {
+                    assert_eq!(set_2.contains(elem), set_2.remove(elem),);
+                    set_2_reference[elem] = false;
+                }
+            }
+            70..76 => {
+                // Union
+                let old_set_1 = set_1.clone();
+                let changed = set_1.union(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] |= val;
+                }
+            }
+            76..82 => {
+                // Intersection
+                let old_set_1 = set_1.clone();
+                let changed = set_1.intersect(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] &= val;
+                }
+            }
+            82..88 => {
+                // Subtraction
+                let old_set_1 = set_1.clone();
+                let changed = set_1.subtract(&set_2);
+                assert_eq!(changed, old_set_1 != set_1);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] &= !val;
+                }
+            }
+            88..94 => {
+                // Union_not
+                set_1.union_not(&set_2, domain_size);
+
+                // Adjust the reference sets.
+                for (x, val) in set_2_reference.iter_enumerated() {
+                    set_1_reference[x] |= !val;
+                }
+            }
+            94..97 => {
+                // Clear
+                if rng.next_bool() {
+                    set_1.clear();
+                    for x in set_1_reference.iter_mut() {
+                        *x = false;
+                    }
+                } else {
+                    set_2.clear();
+                    for x in set_2_reference.iter_mut() {
+                        *x = false;
+                    }
+                }
+            }
+            97..100 => {
+                // Test new_filled().
+                if rng.next_bool() {
+                    set_1 = DenseBitSet::new_filled(domain_size);
+                    for x in set_1_reference.iter_mut() {
+                        *x = true;
+                    }
+                } else {
+                    set_2 = DenseBitSet::new_filled(domain_size);
+                    for x in set_2_reference.iter_mut() {
+                        *x = true;
+                    }
+                }
+            }
+            _ => unreachable!(),
+        }
+
+        // Check the contains function.
+        for i in 0..domain_size {
+            assert_eq!(set_1.contains(i), set_1_reference[i]);
+            assert_eq!(set_2.contains(i), set_2_reference[i]);
+        }
+
+        // Check iter function.
+        assert!(
+            set_1.iter().eq(set_1_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x))
+        );
+        assert!(
+            set_2.iter().eq(set_2_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x))
+        );
+
+        // Check the superset relation.
+        assert_eq!(set_1.superset(&set_2), set_2.iter().all(|x| set_1.contains(x)));
+
+        // Check the `==` operator.
+        assert_eq!(set_1 == set_2, set_1_reference == set_2_reference);
+
+        // Check the `hash()` function.
+        // If the `set_1` and `set_2` are equal, then their hashes must also be equal.
+        if set_1 == set_2 {
+            assert_eq!(hasher.hash_one(&set_1), hasher.hash_one(&set_2));
+        }
+
+        // Check the count function.
+        assert_eq!(set_1.count(), set_1_reference.iter().filter(|&&x| x).count());
+        assert_eq!(set_2.count(), set_2_reference.iter().filter(|&&x| x).count());
+
+        // Check `only_one_elem()`.
+        if let Some(elem) = set_1.only_one_elem() {
+            assert_eq!(set_1.count(), 1);
+            assert_eq!(elem, set_1.iter().next().unwrap());
+        } else {
+            assert_ne!(set_1.count(), 1);
+        }
+
+        // Check `last_set_in()`.
+        if domain_size > 0 {
+            let range = rng.sample_range(domain_size - 1);
+            assert_eq!(
+                set_1.last_set_in(range.clone()),
+                range.clone().filter(|&i| set_1.contains(i)).last()
+            );
+            assert_eq!(
+                set_2.last_set_in(range.clone()),
+                range.filter(|&i| set_2.contains(i)).last()
+            );
+        }
+
+        // Check `Encodable` and `Decodable` implementations.
+        if rng.next() as u32 % TEST_ITERATIONS < 128 {
+            set_1.encode(&mut encoder);
+
+            let mut decoder = DecoderLittleEndian::new(&encoder.bytes);
+            let decoded = DenseBitSet::<usize>::decode(&mut decoder);
+            assert_eq!(
+                decoder.position(),
+                encoder.bytes.len(),
+                "All bytes must be read when decoding."
+            );
+
+            assert_eq!(set_1, decoded);
+
+            encoder.bytes.clear();
+        }
+    }
+}
+
+fn test_relations_with_chunked_set(domain_size: usize) {
+    const TEST_ITERATIONS: u32 = 64;
+
+    let mut dense_set = DenseBitSet::<usize>::new_empty(domain_size);
+    let mut chunked_set = ChunkedBitSet::new_empty(domain_size);
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        // Make a random operation.
+        match rng.next() % 10 {
+            0..3 => {
+                // Insert in one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to insert into.
+                if rng.next_bool() {
+                    dense_set.insert(elem);
+                } else {
+                    chunked_set.insert(elem);
+                }
+            }
+            3..6 => {
+                // Remove from one of the sets.
+                if domain_size == 0 {
+                    continue;
+                }
+                let elem = rng.next() % domain_size;
+                // Choose set to remove into.
+                if rng.next_bool() {
+                    dense_set.remove(elem);
+                } else {
+                    chunked_set.remove(elem);
+                }
+            }
+            6 => {
+                // Clear
+                if rng.next_bool() {
+                    dense_set.clear();
+                } else {
+                    chunked_set.clear();
+                }
+            }
+            7 => {
+                // Fill.
+                if rng.next_bool() {
+                    dense_set.insert_all(domain_size);
+                } else {
+                    chunked_set.insert_all();
+                }
+            }
+            8 => {
+                // Union
+                let old_dense_set = dense_set.clone();
+                let changed = dense_set.union(&chunked_set);
+                assert_eq!(old_dense_set != dense_set, changed);
+                assert!(dense_set.superset(&old_dense_set));
+                assert!(chunked_set.iter().all(|x| dense_set.contains(x)));
+
+                // Check that all the added elements come from `chunked_set`.
+                let mut difference = dense_set.clone();
+                difference.subtract(&old_dense_set);
+                assert!(difference.iter().all(|x| chunked_set.contains(x)));
+            }
+            9 => {
+                // Intersection
+                let old_dense_set = dense_set.clone();
+                let changed = dense_set.intersect(&chunked_set);
+                assert_eq!(old_dense_set != dense_set, changed);
+                assert!(old_dense_set.superset(&dense_set));
+                assert!(dense_set.iter().all(|x| chunked_set.contains(x)));
+
+                // Check that no of the removed elements comes from `chunked_set`.
+                let mut difference = old_dense_set; // Just renaming.
+                difference.subtract(&dense_set);
+                assert!(difference.iter().all(|x| !chunked_set.contains(x)));
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
+#[test]
+fn test_dense_bit_set() {
+    assert_eq!(
+        size_of::<DenseBitSet<usize>>(),
+        size_of::<Word>(),
+        "DenseBitSet should have the same size as a Word"
+    );
+
+    test_with_domain_size(0);
+    test_with_domain_size(1);
+    test_with_domain_size(63);
+    test_with_domain_size(64);
+    test_with_domain_size(65);
+    test_with_domain_size(127);
+    test_with_domain_size(128);
+    test_with_domain_size(129);
+
+    test_relations_with_chunked_set(0);
+    test_relations_with_chunked_set(1);
+    test_relations_with_chunked_set(CHUNK_BITS - 1);
+    test_relations_with_chunked_set(CHUNK_BITS);
+    test_relations_with_chunked_set(CHUNK_BITS + 2);
+    test_relations_with_chunked_set(3 * CHUNK_BITS - 2);
+    test_relations_with_chunked_set(3 * CHUNK_BITS);
+    test_relations_with_chunked_set(3 * CHUNK_BITS + 1);
+}
+
+#[test]
+fn test_growable_bit_set() {
+    const TEST_ITERATIONS: u32 = 512;
+    const MAX_ELEMS: usize = 314;
+
+    let mut set = GrowableBitSet::<usize>::new_empty();
+    let mut reference_set = BTreeSet::<usize>::new();
+
+    let mut rng = Rng::new(42);
+
+    for _ in 0..TEST_ITERATIONS {
+        match rng.next() % 100 {
+            0..30 => {
+                // Insert an element in the `0..=(DenseBitSet::INLINE_CAPACITY + 2)` range.
+                let elem = rng.next() % (DenseBitSet::<usize>::INLINE_CAPACITY + 3);
+                set.insert(elem);
+                reference_set.insert(elem);
+            }
+            30..50 => {
+                // Insert an element in the `0..MAX_ELEMS` range.
+                let elem = rng.next() % MAX_ELEMS;
+                set.insert(elem);
+                reference_set.insert(elem);
+            }
+            50..70 => {
+                // Remove an existing element.
+                let len = set.len();
+                if len == 0 {
+                    continue;
+                }
+                let elem = set.iter().nth(rng.next() % len).unwrap();
+                set.remove(elem);
+                reference_set.remove(&elem);
+            }
+            70..90 => {
+                // Remove an arbitrary element in the `0..MAX_ELEMS` range.
+                let elem = rng.next() % MAX_ELEMS;
+                set.remove(elem);
+                reference_set.remove(&elem);
+            }
+            90..100 => {
+                // Make sure the `with_capacity()` function works.
+                let capacity = rng.next() % MAX_ELEMS;
+                set = GrowableBitSet::with_capacity(capacity);
+                reference_set.clear();
+            }
+            _ => unreachable!(),
+        }
+
+        // Check the `is_empty()` function.
+        assert_eq!(set.is_empty(), reference_set.is_empty());
+
+        // Check the `iter` function.
+        assert!(set.iter().eq(reference_set.iter().copied()));
+
+        // Check the contains function with a 20 % probability.
+        if rng.next() % 5 == 0 {
+            for x in 0..MAX_ELEMS {
+                assert_eq!(set.contains(x), reference_set.contains(&x));
+            }
+        }
+    }
+}
 
 #[test]
 fn test_new_filled() {
@@ -50,11 +624,11 @@ fn bitset_clone_from() {
 
     let mut b = DenseBitSet::new_empty(2);
     b.clone_from(&a);
-    assert_eq!(b.domain_size(), 10);
+    assert!(b.capacity() >= 10);
     assert_eq!(b.iter().collect::<Vec<_>>(), [4, 7, 9]);
 
     b.clone_from(&DenseBitSet::new_empty(40));
-    assert_eq!(b.domain_size(), 40);
+    assert!(b.capacity() >= 40);
     assert_eq!(b.iter().collect::<Vec<_>>(), []);
 }
 
@@ -91,7 +665,7 @@ fn union_not() {
     b.insert(81); // Already in `a`.
     b.insert(90);
 
-    a.union_not(&b);
+    a.union_not(&b, 100);
 
     // After union-not, `a` should contain all values in the domain, except for
     // the ones that are in `b` and were _not_ already in `a`.
@@ -600,10 +1174,7 @@ fn sparse_matrix_operations() {
 #[test]
 fn dense_insert_range() {
     #[track_caller]
-    fn check<R>(domain: usize, range: R)
-    where
-        R: RangeBounds<usize> + Clone + IntoIterator<Item = usize> + std::fmt::Debug,
-    {
+    fn check_range(domain: usize, range: Range<usize>) {
         let mut set = DenseBitSet::new_empty(domain);
         set.insert_range(range.clone());
         for i in set.iter() {
@@ -613,32 +1184,45 @@ fn dense_insert_range() {
             assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range);
         }
     }
-    check(300, 10..10);
-    check(300, WORD_BITS..WORD_BITS * 2);
-    check(300, WORD_BITS - 1..WORD_BITS * 2);
-    check(300, WORD_BITS - 1..WORD_BITS);
-    check(300, 10..100);
-    check(300, 10..30);
-    check(300, 0..5);
-    check(300, 0..250);
-    check(300, 200..250);
-
-    check(300, 10..=10);
-    check(300, WORD_BITS..=WORD_BITS * 2);
-    check(300, WORD_BITS - 1..=WORD_BITS * 2);
-    check(300, WORD_BITS - 1..=WORD_BITS);
-    check(300, 10..=100);
-    check(300, 10..=30);
-    check(300, 0..=5);
-    check(300, 0..=250);
-    check(300, 200..=250);
+
+    #[track_caller]
+    fn check_range_inclusive(domain: usize, range: RangeInclusive<usize>) {
+        let mut set = DenseBitSet::new_empty(domain);
+        set.insert_range_inclusive(range.clone());
+        for i in set.iter() {
+            assert!(range.contains(&i));
+        }
+        for i in range.clone() {
+            assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range);
+        }
+    }
+
+    check_range(300, 10..10);
+    check_range(300, WORD_BITS..WORD_BITS * 2);
+    check_range(300, WORD_BITS - 1..WORD_BITS * 2);
+    check_range(300, WORD_BITS - 1..WORD_BITS);
+    check_range(300, 10..100);
+    check_range(300, 10..30);
+    check_range(300, 0..5);
+    check_range(300, 0..250);
+    check_range(300, 200..250);
+
+    check_range_inclusive(300, 10..=10);
+    check_range_inclusive(300, WORD_BITS..=WORD_BITS * 2);
+    check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS * 2);
+    check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS);
+    check_range_inclusive(300, 10..=100);
+    check_range_inclusive(300, 10..=30);
+    check_range_inclusive(300, 0..=5);
+    check_range_inclusive(300, 0..=250);
+    check_range_inclusive(300, 200..=250);
 
     for i in 0..WORD_BITS * 2 {
         for j in i..WORD_BITS * 2 {
-            check(WORD_BITS * 2, i..j);
-            check(WORD_BITS * 2, i..=j);
-            check(300, i..j);
-            check(300, i..=j);
+            check_range(WORD_BITS * 2, i..j);
+            check_range_inclusive(WORD_BITS * 2, i..=j);
+            check_range(300, i..j);
+            check_range_inclusive(300, i..=j);
         }
     }
 }
@@ -656,7 +1240,7 @@ fn dense_last_set_before() {
     }
 
     #[track_caller]
-    fn cmp(set: &DenseBitSet<usize>, needle: impl RangeBounds<usize> + Clone + std::fmt::Debug) {
+    fn cmp(set: &DenseBitSet<usize>, needle: RangeInclusive<usize>) {
         assert_eq!(
             set.last_set_in(needle.clone()),
             easy(set, needle.clone()),
@@ -672,20 +1256,18 @@ fn dense_last_set_before() {
     set.insert(WORD_BITS - 1);
     cmp(&set, 0..=WORD_BITS - 1);
     cmp(&set, 0..=5);
-    cmp(&set, 10..100);
+    cmp(&set, 10..=99);
     set.insert(100);
-    cmp(&set, 100..110);
-    cmp(&set, 99..100);
+    cmp(&set, 100..=119);
+    cmp(&set, 99..=99);
     cmp(&set, 99..=100);
 
     for i in 0..=WORD_BITS * 2 {
         for j in i..=WORD_BITS * 2 {
             for k in 0..WORD_BITS * 2 {
                 let mut set = DenseBitSet::new_empty(300);
-                cmp(&set, i..j);
                 cmp(&set, i..=j);
                 set.insert(k);
-                cmp(&set, i..j);
                 cmp(&set, i..=j);
             }
         }
diff --git a/compiler/rustc_middle/src/values.rs b/compiler/rustc_middle/src/values.rs
index 4d70a70873267..46bcc25dc0710 100644
--- a/compiler/rustc_middle/src/values.rs
+++ b/compiler/rustc_middle/src/values.rs
@@ -376,12 +376,8 @@ fn find_item_ty_spans(
                 });
                 if check_params && let Some(args) = path.segments.last().unwrap().args {
                     let params_in_repr = tcx.params_in_repr(def_id);
-                    // the domain size check is needed because the HIR may not be well-formed at this point
-                    for (i, arg) in args.args.iter().enumerate().take(params_in_repr.domain_size())
-                    {
-                        if let hir::GenericArg::Type(ty) = arg
-                            && params_in_repr.contains(i as u32)
-                        {
+                    for arg in params_in_repr.iter().map_while(|i| args.args.get(i as usize)) {
+                        if let hir::GenericArg::Type(ty) = arg {
                             find_item_ty_spans(
                                 tcx,
                                 ty.as_unambig_ty(),
diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs
index 3f6e7a0661921..d45509d9de758 100644
--- a/compiler/rustc_mir_dataflow/src/framework/cursor.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs
@@ -127,7 +127,7 @@ where
     #[cfg(test)]
     pub(crate) fn allow_unreachable(&mut self) {
         #[cfg(debug_assertions)]
-        self.reachable_blocks.insert_all()
+        self.reachable_blocks.insert_all(self.body().basic_blocks.len())
     }
 
     /// Returns the `Analysis` used to generate the underlying `Results`.
diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
index 38599cd094933..8a5d3c35f2f52 100644
--- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs
@@ -82,21 +82,12 @@ where
     }
 
     fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let size = self.domain_size();
-        assert_eq!(size, old.domain_size());
-
-        let mut set_in_self = MixedBitSet::new_empty(size);
-        let mut cleared_in_self = MixedBitSet::new_empty(size);
-
-        for i in (0..size).map(T::new) {
-            match (self.contains(i), old.contains(i)) {
-                (true, false) => set_in_self.insert(i),
-                (false, true) => cleared_in_self.insert(i),
-                _ => continue,
-            };
-        }
+        let mut set_in_self = self.clone();
+        set_in_self.subtract(old);
+        let mut cleared_in_self = old.clone();
+        cleared_in_self.subtract(self);
 
-        fmt_diff(&set_in_self, &cleared_in_self, ctxt, f)
+        fmt_diff(&MixedBitSet::Small(set_in_self), &MixedBitSet::Small(cleared_in_self), ctxt, f)
     }
 }
 
diff --git a/compiler/rustc_mir_dataflow/src/impls/initialized.rs b/compiler/rustc_mir_dataflow/src/impls/initialized.rs
index 18165b0b9bd08..4f77deb8526d2 100644
--- a/compiler/rustc_mir_dataflow/src/impls/initialized.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/initialized.rs
@@ -464,7 +464,7 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
     // sets state bits for Arg places
     fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) {
         // set all bits to 1 (uninit) before gathering counter-evidence
-        state.insert_all();
+        state.insert_all(self.move_data().move_paths.len());
 
         drop_flag_effects_for_function_entry(self.body, self.move_data, |path, s| {
             assert!(s == DropFlagState::Present);
diff --git a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
index e3aa8f5a62014..896b86156c8a2 100644
--- a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs
@@ -87,7 +87,6 @@ impl<'a, 'tcx> Analysis<'tcx> for MaybeStorageDead<'a> {
     }
 
     fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) {
-        assert_eq!(body.local_decls.len(), self.always_live_locals.domain_size());
         // Do not iterate on return place and args, as they are trivially always live.
         for local in body.vars_and_temps_iter() {
             if !self.always_live_locals.contains(local) {
diff --git a/compiler/rustc_mir_transform/src/copy_prop.rs b/compiler/rustc_mir_transform/src/copy_prop.rs
index 27af5818982d0..42cac41d8cb6d 100644
--- a/compiler/rustc_mir_transform/src/copy_prop.rs
+++ b/compiler/rustc_mir_transform/src/copy_prop.rs
@@ -34,7 +34,7 @@ impl<'tcx> crate::MirPass<'tcx> for CopyProp {
         let fully_moved = fully_moved_locals(&ssa, body);
         debug!(?fully_moved);
 
-        let mut storage_to_remove = DenseBitSet::new_empty(fully_moved.domain_size());
+        let mut storage_to_remove = DenseBitSet::new_empty(body.local_decls.len());
         for (local, &head) in ssa.copy_classes().iter_enumerated() {
             if local != head {
                 storage_to_remove.insert(head);
diff --git a/compiler/rustc_mir_transform/src/coroutine.rs b/compiler/rustc_mir_transform/src/coroutine.rs
index cddb2f8477858..1b58b7dedf739 100644
--- a/compiler/rustc_mir_transform/src/coroutine.rs
+++ b/compiler/rustc_mir_transform/src/coroutine.rs
@@ -211,6 +211,9 @@ struct TransformVisitor<'tcx> {
     old_yield_ty: Ty<'tcx>,
 
     old_ret_ty: Ty<'tcx>,
+
+    /// The number of locals in the [`Body`].
+    n_locals: usize,
 }
 
 impl<'tcx> TransformVisitor<'tcx> {
@@ -440,7 +443,7 @@ impl<'tcx> MutVisitor<'tcx> for TransformVisitor<'tcx> {
                 let storage_liveness: GrowableBitSet<Local> =
                     self.storage_liveness[block].clone().unwrap().into();
 
-                for i in 0..self.always_live_locals.domain_size() {
+                for i in 0..self.n_locals {
                     let l = Local::new(i);
                     let needs_storage_dead = storage_liveness.contains(l)
                         && !self.remap.contains(l)
@@ -845,8 +848,6 @@ fn compute_storage_conflicts<'mir, 'tcx>(
     analysis: &mut MaybeRequiresStorage<'mir, 'tcx>,
     results: &Results<DenseBitSet<Local>>,
 ) -> BitMatrix<CoroutineSavedLocal, CoroutineSavedLocal> {
-    assert_eq!(body.local_decls.len(), saved_locals.domain_size());
-
     debug!("compute_storage_conflicts({:?})", body.span);
     debug!("always_live = {:?}", always_live_locals);
 
@@ -859,7 +860,11 @@ fn compute_storage_conflicts<'mir, 'tcx>(
     let mut visitor = StorageConflictVisitor {
         body,
         saved_locals,
-        local_conflicts: BitMatrix::from_row_n(&ineligible_locals, body.local_decls.len()),
+        local_conflicts: BitMatrix::from_row_n(
+            &ineligible_locals,
+            body.local_decls.len(),
+            body.local_decls.len(),
+        ),
         eligible_storage_live: DenseBitSet::new_empty(body.local_decls.len()),
     };
 
@@ -1010,7 +1015,7 @@ fn compute_layout<'tcx>(
     // Create a map from local indices to coroutine struct indices.
     let mut variant_fields: IndexVec<VariantIdx, IndexVec<FieldIdx, CoroutineSavedLocal>> =
         iter::repeat(IndexVec::new()).take(CoroutineArgs::RESERVED_VARIANTS).collect();
-    let mut remap = IndexVec::from_elem_n(None, saved_locals.domain_size());
+    let mut remap = IndexVec::from_elem_n(None, body.local_decls.len());
     for (suspension_point_idx, live_locals) in live_locals_at_suspension_points.iter().enumerate() {
         let variant_index =
             VariantIdx::from(CoroutineArgs::RESERVED_VARIANTS + suspension_point_idx);
@@ -1585,6 +1590,7 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform {
             discr_ty,
             old_ret_ty,
             old_yield_ty,
+            n_locals: body.local_decls.len(),
         };
         transform.visit_body(body);
 
diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs
index 5568d42ab8f3c..c226f9b89ceb8 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters.rs
@@ -81,8 +81,9 @@ pub(crate) fn transcribe_counters(
     old: &NodeCounters<BasicCoverageBlock>,
     bcb_needs_counter: &DenseBitSet<BasicCoverageBlock>,
     bcbs_seen: &DenseBitSet<BasicCoverageBlock>,
+    num_bcbs: usize,
 ) -> CoverageCounters {
-    let mut new = CoverageCounters::with_num_bcbs(bcb_needs_counter.domain_size());
+    let mut new = CoverageCounters::with_num_bcbs(num_bcbs);
 
     for bcb in bcb_needs_counter.iter() {
         if !bcbs_seen.contains(bcb) {
diff --git a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
index 4c20722a04347..e31cec383cfe4 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs
@@ -72,7 +72,7 @@ impl<G: graph::DirectedGraph> BalancedFlowGraph<G> {
         // Next, find all nodes that are currently not reverse-reachable from
         // `sink_edge_nodes`, and add them to the set as well.
         dfs.complete_search();
-        sink_edge_nodes.union_not(dfs.visited_set());
+        sink_edge_nodes.union_not(dfs.visited_set(), graph.num_nodes());
 
         // The sink node is 1 higher than the highest real node.
         let sink = G::Node::new(graph.num_nodes());
diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs
index ccf76dc710874..003082d396d1b 100644
--- a/compiler/rustc_mir_transform/src/coverage/query.rs
+++ b/compiler/rustc_mir_transform/src/coverage/query.rs
@@ -136,7 +136,12 @@ fn coverage_ids_info<'tcx>(
     priority_list[1..].sort_by_key(|&bcb| !bcbs_seen.contains(bcb));
 
     let node_counters = make_node_counters(&fn_cov_info.node_flow_data, &priority_list);
-    let coverage_counters = transcribe_counters(&node_counters, &bcb_needs_counter, &bcbs_seen);
+    let coverage_counters = transcribe_counters(
+        &node_counters,
+        &bcb_needs_counter,
+        &bcbs_seen,
+        fn_cov_info.priority_list.len(),
+    );
 
     let CoverageCounters {
         phys_counter_for_node, next_counter_id, node_counters, expressions, ..
diff --git a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
index a0db8bdb7ed88..b2f077d5206fc 100644
--- a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
+++ b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs
@@ -19,19 +19,20 @@ struct DeduceReadOnly {
     /// 1). The bit is true if the argument may have been mutated or false if we know it hasn't
     /// been up to the point we're at.
     mutable_args: DenseBitSet<usize>,
+    arg_count: usize,
 }
 
 impl DeduceReadOnly {
     /// Returns a new DeduceReadOnly instance.
     fn new(arg_count: usize) -> Self {
-        Self { mutable_args: DenseBitSet::new_empty(arg_count) }
+        Self { mutable_args: DenseBitSet::new_empty(arg_count), arg_count }
     }
 }
 
 impl<'tcx> Visitor<'tcx> for DeduceReadOnly {
     fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, _location: Location) {
         // We're only interested in arguments.
-        if place.local == RETURN_PLACE || place.local.index() > self.mutable_args.domain_size() {
+        if place.local == RETURN_PLACE || place.local.index() > self.arg_count {
             return;
         }
 
@@ -86,7 +87,7 @@ impl<'tcx> Visitor<'tcx> for DeduceReadOnly {
                     let local = place.local;
                     if place.is_indirect()
                         || local == RETURN_PLACE
-                        || local.index() > self.mutable_args.domain_size()
+                        || local.index() > self.arg_count
                     {
                         continue;
                     }
diff --git a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
index 75f351f05c30e..0d4bfd2f78b8a 100644
--- a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
+++ b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs
@@ -274,7 +274,7 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
 
         // We shall now exclude some local bindings for the following cases.
         {
-            let mut to_exclude = MixedBitSet::new_empty(all_locals_dropped.domain_size());
+            let mut to_exclude = MixedBitSet::new_empty(move_data.move_paths.len());
             // We will now do subtraction from the candidate dropped locals, because of the
             // following reasons.
             for path_idx in all_locals_dropped.iter() {
diff --git a/compiler/rustc_mir_transform/src/single_use_consts.rs b/compiler/rustc_mir_transform/src/single_use_consts.rs
index 02caa92ad3fc8..d88f8607f8484 100644
--- a/compiler/rustc_mir_transform/src/single_use_consts.rs
+++ b/compiler/rustc_mir_transform/src/single_use_consts.rs
@@ -33,7 +33,9 @@ impl<'tcx> crate::MirPass<'tcx> for SingleUseConsts {
             locals_in_debug_info: DenseBitSet::new_empty(body.local_decls.len()),
         };
 
-        finder.ineligible_locals.insert_range(..=Local::from_usize(body.arg_count));
+        finder
+            .ineligible_locals
+            .insert_range_inclusive(Local::from_usize(0)..=Local::from_usize(body.arg_count));
 
         finder.visit_body(body);
 
diff --git a/compiler/rustc_mir_transform/src/sroa.rs b/compiler/rustc_mir_transform/src/sroa.rs
index 7c6ccc89c4f30..c390c993c5aed 100644
--- a/compiler/rustc_mir_transform/src/sroa.rs
+++ b/compiler/rustc_mir_transform/src/sroa.rs
@@ -103,7 +103,7 @@ fn escaping_locals<'tcx>(
     };
 
     let mut set = DenseBitSet::new_empty(body.local_decls.len());
-    set.insert_range(RETURN_PLACE..=Local::from_usize(body.arg_count));
+    set.insert_range_inclusive(RETURN_PLACE..=Local::from_usize(body.arg_count));
     for (local, decl) in body.local_decls().iter_enumerated() {
         if excluded.contains(local) || is_excluded_ty(decl.ty) {
             set.insert(local);