diff --git a/Cargo.lock b/Cargo.lock index 99cb71cd0ac87..d398c1d5637b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3855,6 +3855,7 @@ dependencies = [ name = "rustc_index" version = "0.0.0" dependencies = [ + "itertools", "rustc_index_macros", "rustc_macros", "rustc_serialize", diff --git a/compiler/rustc_abi/src/layout/coroutine.rs b/compiler/rustc_abi/src/layout/coroutine.rs index 27e704d538c83..73564705686f0 100644 --- a/compiler/rustc_abi/src/layout/coroutine.rs +++ b/compiler/rustc_abi/src/layout/coroutine.rs @@ -120,7 +120,7 @@ fn coroutine_saved_local_eligibility>(t: &T) -> Hash128 { h.finish() } -// Check that bit set hash includes the domain size. -#[test] -fn test_hash_bit_set() { - use rustc_index::bit_set::DenseBitSet; - let a: DenseBitSet = DenseBitSet::new_empty(1); - let b: DenseBitSet = DenseBitSet::new_empty(2); - assert_ne!(a, b); - assert_ne!(hash(&a), hash(&b)); -} - // Check that bit matrix hash includes the matrix dimensions. #[test] fn test_hash_bit_matrix() { diff --git a/compiler/rustc_index/Cargo.toml b/compiler/rustc_index/Cargo.toml index 3d83a3c98daf8..9aa24e668b6b7 100644 --- a/compiler/rustc_index/Cargo.toml +++ b/compiler/rustc_index/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +itertools = "0.12" rustc_index_macros = { path = "../rustc_index_macros" } rustc_macros = { path = "../rustc_macros", optional = true } rustc_serialize = { path = "../rustc_serialize", optional = true } diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 07934389158e5..e5588e90f742e 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -1,11 +1,13 @@ +mod dense_bit_set; use std::marker::PhantomData; #[cfg(not(feature = "nightly"))] use std::mem; -use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl}; +use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Not, Range, Shl}; use std::rc::Rc; -use std::{fmt, iter, slice}; +use std::{fmt, iter}; use Chunk::*; +pub use dense_bit_set::{BitIter, DenseBitSet, GrowableBitSet}; #[cfg(feature = "nightly")] use rustc_macros::{Decodable_NoContext, Encodable_NoContext}; use smallvec::{SmallVec, smallvec}; @@ -43,29 +45,6 @@ pub trait BitRelations { fn intersect(&mut self, other: &Rhs) -> bool; } -#[inline] -fn inclusive_start_end( - range: impl RangeBounds, - domain: usize, -) -> Option<(usize, usize)> { - // Both start and end are inclusive. - let start = match range.start_bound().cloned() { - Bound::Included(start) => start.index(), - Bound::Excluded(start) => start.index() + 1, - Bound::Unbounded => 0, - }; - let end = match range.end_bound().cloned() { - Bound::Included(end) => end.index(), - Bound::Excluded(end) => end.index().checked_sub(1)?, - Bound::Unbounded => domain - 1, - }; - assert!(end < domain); - if start > end { - return None; - } - Some((start, end)) -} - macro_rules! bit_relations_inherent_impls { () => { /// Sets `self = self | other` and returns `true` if `self` changed @@ -96,345 +75,7 @@ macro_rules! bit_relations_inherent_impls { } }; } - -/// A fixed-size bitset type with a dense representation. -/// -/// Note 1: Since this bitset is dense, if your domain is big, and/or relatively -/// homogeneous (for example, with long runs of bits set or unset), then it may -/// be preferable to instead use a [MixedBitSet], or an -/// [IntervalSet](crate::interval::IntervalSet). They should be more suited to -/// sparse, or highly-compressible, domains. -/// -/// Note 2: Use [`GrowableBitSet`] if you need support for resizing after creation. -/// -/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also -/// just be `usize`. -/// -/// All operations that involve an element will panic if the element is equal -/// to or greater than the domain size. All operations that involve two bitsets -/// will panic if the bitsets have differing domain sizes. -/// -#[cfg_attr(feature = "nightly", derive(Decodable_NoContext, Encodable_NoContext))] -#[derive(Eq, PartialEq, Hash)] -pub struct DenseBitSet { - domain_size: usize, - words: SmallVec<[Word; 2]>, - marker: PhantomData, -} - -impl DenseBitSet { - /// Gets the domain size. - pub fn domain_size(&self) -> usize { - self.domain_size - } -} - -impl DenseBitSet { - /// Creates a new, empty bitset with a given `domain_size`. - #[inline] - pub fn new_empty(domain_size: usize) -> DenseBitSet { - let num_words = num_words(domain_size); - DenseBitSet { domain_size, words: smallvec![0; num_words], marker: PhantomData } - } - - /// Creates a new, filled bitset with a given `domain_size`. - #[inline] - pub fn new_filled(domain_size: usize) -> DenseBitSet { - let num_words = num_words(domain_size); - let mut result = - DenseBitSet { domain_size, words: smallvec![!0; num_words], marker: PhantomData }; - result.clear_excess_bits(); - result - } - - /// Clear all elements. - #[inline] - pub fn clear(&mut self) { - self.words.fill(0); - } - - /// Clear excess bits in the final word. - fn clear_excess_bits(&mut self) { - clear_excess_bits_in_final_word(self.domain_size, &mut self.words); - } - - /// Count the number of set bits in the set. - pub fn count(&self) -> usize { - self.words.iter().map(|e| e.count_ones() as usize).sum() - } - - /// Returns `true` if `self` contains `elem`. - #[inline] - pub fn contains(&self, elem: T) -> bool { - assert!(elem.index() < self.domain_size); - let (word_index, mask) = word_index_and_mask(elem); - (self.words[word_index] & mask) != 0 - } - - /// Is `self` is a (non-strict) superset of `other`? - #[inline] - pub fn superset(&self, other: &DenseBitSet) -> bool { - assert_eq!(self.domain_size, other.domain_size); - self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b) - } - - /// Is the set empty? - #[inline] - pub fn is_empty(&self) -> bool { - self.words.iter().all(|a| *a == 0) - } - - /// Insert `elem`. Returns whether the set has changed. - #[inline] - pub fn insert(&mut self, elem: T) -> bool { - assert!( - elem.index() < self.domain_size, - "inserting element at index {} but domain size is {}", - elem.index(), - self.domain_size, - ); - let (word_index, mask) = word_index_and_mask(elem); - let word_ref = &mut self.words[word_index]; - let word = *word_ref; - let new_word = word | mask; - *word_ref = new_word; - new_word != word - } - - #[inline] - pub fn insert_range(&mut self, elems: impl RangeBounds) { - let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else { - return; - }; - - let (start_word_index, start_mask) = word_index_and_mask(start); - let (end_word_index, end_mask) = word_index_and_mask(end); - - // Set all words in between start and end (exclusively of both). - for word_index in (start_word_index + 1)..end_word_index { - self.words[word_index] = !0; - } - - if start_word_index != end_word_index { - // Start and end are in different words, so we handle each in turn. - // - // We set all leading bits. This includes the start_mask bit. - self.words[start_word_index] |= !(start_mask - 1); - // And all trailing bits (i.e. from 0..=end) in the end word, - // including the end. - self.words[end_word_index] |= end_mask | (end_mask - 1); - } else { - self.words[start_word_index] |= end_mask | (end_mask - start_mask); - } - } - - /// Sets all bits to true. - pub fn insert_all(&mut self) { - self.words.fill(!0); - self.clear_excess_bits(); - } - - /// Returns `true` if the set has changed. - #[inline] - pub fn remove(&mut self, elem: T) -> bool { - assert!(elem.index() < self.domain_size); - let (word_index, mask) = word_index_and_mask(elem); - let word_ref = &mut self.words[word_index]; - let word = *word_ref; - let new_word = word & !mask; - *word_ref = new_word; - new_word != word - } - - /// Iterates over the indices of set bits in a sorted order. - #[inline] - pub fn iter(&self) -> BitIter<'_, T> { - BitIter::new(&self.words) - } - - pub fn last_set_in(&self, range: impl RangeBounds) -> Option { - let (start, end) = inclusive_start_end(range, self.domain_size)?; - let (start_word_index, _) = word_index_and_mask(start); - let (end_word_index, end_mask) = word_index_and_mask(end); - - let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1)); - if end_word != 0 { - let pos = max_bit(end_word) + WORD_BITS * end_word_index; - if start <= pos { - return Some(T::new(pos)); - } - } - - // We exclude end_word_index from the range here, because we don't want - // to limit ourselves to *just* the last word: the bits set it in may be - // after `end`, so it may not work out. - if let Some(offset) = - self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0) - { - let word_idx = start_word_index + offset; - let start_word = self.words[word_idx]; - let pos = max_bit(start_word) + WORD_BITS * word_idx; - if start <= pos { - return Some(T::new(pos)); - } - } - - None - } - - bit_relations_inherent_impls! {} - - /// Sets `self = self | !other`. - /// - /// FIXME: Incorporate this into [`BitRelations`] and fill out - /// implementations for other bitset types, if needed. - pub fn union_not(&mut self, other: &DenseBitSet) { - assert_eq!(self.domain_size, other.domain_size); - - // FIXME(Zalathar): If we were to forcibly _set_ all excess bits before - // the bitwise update, and then clear them again afterwards, we could - // quickly and accurately detect whether the update changed anything. - // But that's only worth doing if there's an actual use-case. - - bitwise(&mut self.words, &other.words, |a, b| a | !b); - // The bitwise update `a | !b` can result in the last word containing - // out-of-domain bits, so we need to clear them. - self.clear_excess_bits(); - } -} - -// dense REL dense -impl BitRelations> for DenseBitSet { - fn union(&mut self, other: &DenseBitSet) -> bool { - assert_eq!(self.domain_size, other.domain_size); - bitwise(&mut self.words, &other.words, |a, b| a | b) - } - - fn subtract(&mut self, other: &DenseBitSet) -> bool { - assert_eq!(self.domain_size, other.domain_size); - bitwise(&mut self.words, &other.words, |a, b| a & !b) - } - - fn intersect(&mut self, other: &DenseBitSet) -> bool { - assert_eq!(self.domain_size, other.domain_size); - bitwise(&mut self.words, &other.words, |a, b| a & b) - } -} - -impl From> for DenseBitSet { - fn from(bit_set: GrowableBitSet) -> Self { - bit_set.bit_set - } -} - -impl Clone for DenseBitSet { - fn clone(&self) -> Self { - DenseBitSet { - domain_size: self.domain_size, - words: self.words.clone(), - marker: PhantomData, - } - } - - fn clone_from(&mut self, from: &Self) { - self.domain_size = from.domain_size; - self.words.clone_from(&from.words); - } -} - -impl fmt::Debug for DenseBitSet { - fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { - w.debug_list().entries(self.iter()).finish() - } -} - -impl ToString for DenseBitSet { - fn to_string(&self) -> String { - let mut result = String::new(); - let mut sep = '['; - - // Note: this is a little endian printout of bytes. - - // i tracks how many bits we have printed so far. - let mut i = 0; - for word in &self.words { - let mut word = *word; - for _ in 0..WORD_BYTES { - // for each byte in `word`: - let remain = self.domain_size - i; - // If less than a byte remains, then mask just that many bits. - let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF }; - assert!(mask <= 0xFF); - let byte = word & mask; - - result.push_str(&format!("{sep}{byte:02x}")); - - if remain <= 8 { - break; - } - word >>= 8; - i += 8; - sep = '-'; - } - sep = '|'; - } - result.push(']'); - - result - } -} - -pub struct BitIter<'a, T: Idx> { - /// A copy of the current word, but with any already-visited bits cleared. - /// (This lets us use `trailing_zeros()` to find the next set bit.) When it - /// is reduced to 0, we move onto the next word. - word: Word, - - /// The offset (measured in bits) of the current word. - offset: usize, - - /// Underlying iterator over the words. - iter: slice::Iter<'a, Word>, - - marker: PhantomData, -} - -impl<'a, T: Idx> BitIter<'a, T> { - #[inline] - fn new(words: &'a [Word]) -> BitIter<'a, T> { - // We initialize `word` and `offset` to degenerate values. On the first - // call to `next()` we will fall through to getting the first word from - // `iter`, which sets `word` to the first word (if there is one) and - // `offset` to 0. Doing it this way saves us from having to maintain - // additional state about whether we have started. - BitIter { - word: 0, - offset: usize::MAX - (WORD_BITS - 1), - iter: words.iter(), - marker: PhantomData, - } - } -} - -impl<'a, T: Idx> Iterator for BitIter<'a, T> { - type Item = T; - fn next(&mut self) -> Option { - loop { - if self.word != 0 { - // Get the position of the next set bit in the current word, - // then clear the bit. - let bit_pos = self.word.trailing_zeros() as usize; - self.word ^= 1 << bit_pos; - return Some(T::new(bit_pos + self.offset)); - } - - // Move onto the next word. `wrapping_add()` is needed to handle - // the degenerate initial value given to `offset` in `new()`. - self.word = *self.iter.next()?; - self.offset = self.offset.wrapping_add(WORD_BITS); - } - } -} +use bit_relations_inherent_impls; /// A fixed-size bitset type with a partially dense, partially sparse /// representation. The bitset is broken into chunks, and chunks that are all @@ -727,7 +368,7 @@ impl ChunkedBitSet { Some(Ones(chunk_domain_size)) => ChunkIter::Ones(0..*chunk_domain_size as usize), Some(Mixed(chunk_domain_size, _, words)) => { let num_words = num_words(*chunk_domain_size as usize); - ChunkIter::Mixed(BitIter::new(&words[0..num_words])) + ChunkIter::Mixed(BitIter::from_slice(&words[0..num_words])) } None => ChunkIter::Finished, } @@ -771,8 +412,8 @@ impl BitRelations> for ChunkedBitSet { ) { let self_chunk_words = Rc::make_mut(self_chunk_words); let has_changed = bitwise( - &mut self_chunk_words[0..num_words], - &other_chunk_words[0..num_words], + self_chunk_words[0..num_words].iter_mut(), + other_chunk_words[0..num_words].iter().copied(), op, ); debug_assert!(has_changed); @@ -847,8 +488,8 @@ impl BitRelations> for ChunkedBitSet { ) { let self_chunk_words = Rc::make_mut(self_chunk_words); let has_changed = bitwise( - &mut self_chunk_words[0..num_words], - &other_chunk_words[0..num_words], + self_chunk_words[0..num_words].iter_mut(), + other_chunk_words[0..num_words].iter().copied(), op, ); debug_assert!(has_changed); @@ -898,8 +539,8 @@ impl BitRelations> for ChunkedBitSet { ) { let self_chunk_words = Rc::make_mut(self_chunk_words); let has_changed = bitwise( - &mut self_chunk_words[0..num_words], - &other_chunk_words[0..num_words], + self_chunk_words[0..num_words].iter_mut(), + other_chunk_words[0..num_words].iter().copied(), op, ); debug_assert!(has_changed); @@ -920,48 +561,6 @@ impl BitRelations> for ChunkedBitSet { } } -impl BitRelations> for DenseBitSet { - fn union(&mut self, other: &ChunkedBitSet) -> bool { - sequential_update(|elem| self.insert(elem), other.iter()) - } - - fn subtract(&mut self, _other: &ChunkedBitSet) -> bool { - unimplemented!("implement if/when necessary"); - } - - fn intersect(&mut self, other: &ChunkedBitSet) -> bool { - assert_eq!(self.domain_size(), other.domain_size); - let mut changed = false; - for (i, chunk) in other.chunks.iter().enumerate() { - let mut words = &mut self.words[i * CHUNK_WORDS..]; - if words.len() > CHUNK_WORDS { - words = &mut words[..CHUNK_WORDS]; - } - match chunk { - Zeros(..) => { - for word in words { - if *word != 0 { - changed = true; - *word = 0; - } - } - } - Ones(..) => (), - Mixed(_, _, data) => { - for (i, word) in words.iter_mut().enumerate() { - let new_val = *word & data[i]; - if new_val != *word { - changed = true; - *word = new_val; - } - } - } - } - } - changed - } -} - impl Clone for ChunkedBitSet { fn clone(&self) -> Self { ChunkedBitSet { @@ -1080,15 +679,6 @@ enum ChunkIter<'a> { Finished, } -// Applies a function to mutate a bitset, and returns true if any -// of the applications return true -fn sequential_update( - mut self_update: impl FnMut(T) -> bool, - it: impl Iterator, -) -> bool { - it.fold(false, |changed, elem| self_update(elem) | changed) -} - impl fmt::Debug for ChunkedBitSet { fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { w.debug_list().entries(self.iter()).finish() @@ -1108,15 +698,16 @@ impl fmt::Debug for ChunkedBitSet { /// "changed" return value unreliable, because the change might have only /// affected excess bits. #[inline] -fn bitwise(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool -where - Op: Fn(Word, Word) -> Word, -{ - assert_eq!(out_vec.len(), in_vec.len()); +fn bitwise<'a>( + out: impl ExactSizeIterator, + in_: impl ExactSizeIterator, + op: impl Fn(Word, Word) -> Word, +) -> bool { + assert_eq!(out.len(), in_.len()); let mut changed = 0; - for (out_elem, in_elem) in iter::zip(out_vec, in_vec) { + for (out_elem, in_elem) in iter::zip(out, in_) { let old_val = *out_elem; - let new_val = op(old_val, *in_elem); + let new_val = op(old_val, in_elem); *out_elem = new_val; // This is essentially equivalent to a != with changed being a bool, but // in practice this code gets auto-vectorized by the compiler for most @@ -1161,15 +752,6 @@ pub enum MixedBitSet { Large(ChunkedBitSet), } -impl MixedBitSet { - pub fn domain_size(&self) -> usize { - match self { - MixedBitSet::Small(set) => set.domain_size(), - MixedBitSet::Large(set) => set.domain_size(), - } - } -} - impl MixedBitSet { #[inline] pub fn new_empty(domain_size: usize) -> MixedBitSet { @@ -1204,10 +786,15 @@ impl MixedBitSet { } } - pub fn insert_all(&mut self) { + /// Insert `0..domain_size` in the set. + /// + /// We would like an insert all function that doesn't require the domain size, but the exact + /// domain size is not stored in the `Small` variant, so that is not possible. + #[inline] + pub fn insert_all(&mut self, domain_size: usize) { match self { - MixedBitSet::Small(set) => set.insert_all(), - MixedBitSet::Large(set) => set.insert_all(), + Self::Small(set) => set.insert_all(domain_size), + Self::Large(set) => set.insert_all(), } } @@ -1304,87 +891,6 @@ impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> { } } -/// A resizable bitset type with a dense representation. -/// -/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also -/// just be `usize`. -/// -/// All operations that involve an element will panic if the element is equal -/// to or greater than the domain size. -#[derive(Clone, Debug, PartialEq)] -pub struct GrowableBitSet { - bit_set: DenseBitSet, -} - -impl Default for GrowableBitSet { - fn default() -> Self { - GrowableBitSet::new_empty() - } -} - -impl GrowableBitSet { - /// Ensure that the set can hold at least `min_domain_size` elements. - pub fn ensure(&mut self, min_domain_size: usize) { - if self.bit_set.domain_size < min_domain_size { - self.bit_set.domain_size = min_domain_size; - } - - let min_num_words = num_words(min_domain_size); - if self.bit_set.words.len() < min_num_words { - self.bit_set.words.resize(min_num_words, 0) - } - } - - pub fn new_empty() -> GrowableBitSet { - GrowableBitSet { bit_set: DenseBitSet::new_empty(0) } - } - - pub fn with_capacity(capacity: usize) -> GrowableBitSet { - GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) } - } - - /// Returns `true` if the set has changed. - #[inline] - pub fn insert(&mut self, elem: T) -> bool { - self.ensure(elem.index() + 1); - self.bit_set.insert(elem) - } - - /// Returns `true` if the set has changed. - #[inline] - pub fn remove(&mut self, elem: T) -> bool { - self.ensure(elem.index() + 1); - self.bit_set.remove(elem) - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.bit_set.is_empty() - } - - #[inline] - pub fn contains(&self, elem: T) -> bool { - let (word_index, mask) = word_index_and_mask(elem); - self.bit_set.words.get(word_index).is_some_and(|word| (word & mask) != 0) - } - - #[inline] - pub fn iter(&self) -> BitIter<'_, T> { - self.bit_set.iter() - } - - #[inline] - pub fn len(&self) -> usize { - self.bit_set.count() - } -} - -impl From> for GrowableBitSet { - fn from(bit_set: DenseBitSet) -> Self { - Self { bit_set } - } -} - /// A fixed-size 2D bit matrix type with a dense representation. /// /// `R` and `C` are index types used to identify rows and columns respectively; @@ -1416,14 +922,17 @@ impl BitMatrix { } /// Creates a new matrix, with `row` used as the value for every row. - pub fn from_row_n(row: &DenseBitSet, num_rows: usize) -> BitMatrix { - let num_columns = row.domain_size(); + pub fn from_row_n( + row: &DenseBitSet, + num_rows: usize, + num_columns: usize, + ) -> BitMatrix { let words_per_row = num_words(num_columns); - assert_eq!(words_per_row, row.words.len()); + assert_eq!(words_per_row, row.words().len()); BitMatrix { num_rows, num_columns, - words: iter::repeat(&row.words).take(num_rows).flatten().cloned().collect(), + words: iter::repeat_with(|| row.words()).take(num_rows).flatten().collect(), marker: PhantomData, } } @@ -1516,9 +1025,9 @@ impl BitMatrix { /// returns `true` if anything changed. pub fn union_row_with(&mut self, with: &DenseBitSet, write: R) -> bool { assert!(write.index() < self.num_rows); - assert_eq!(with.domain_size(), self.num_columns); + assert!(with.capacity() >= self.num_columns); let (write_start, write_end) = self.range(write); - bitwise(&mut self.words[write_start..write_end], &with.words, |a, b| a | b) + bitwise(self.words[write_start..write_end].iter_mut(), with.words(), |a, b| a | b) } /// Sets every cell in `row` to true. @@ -1542,7 +1051,7 @@ impl BitMatrix { pub fn iter(&self, row: R) -> BitIter<'_, C> { assert!(row.index() < self.num_rows); let (start, end) = self.range(row); - BitIter::new(&self.words[start..end]) + BitIter::from_slice(&self.words[start..end]) } /// Returns the number of elements in `row`. @@ -1657,11 +1166,6 @@ impl SparseBitMatrix { } } - /// Insert all bits in the given row. - pub fn insert_all_into_row(&mut self, row: R) { - self.ensure_row(row).insert_all(); - } - pub fn rows(&self) -> impl Iterator { self.rows.indices() } @@ -1754,11 +1258,6 @@ fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) { } } -#[inline] -fn max_bit(word: Word) -> usize { - WORD_BITS - 1 - word.leading_zeros() as usize -} - /// Integral type used to represent the bit set. pub trait FiniteBitSetTy: BitAnd diff --git a/compiler/rustc_index/src/bit_set/dense_bit_set.rs b/compiler/rustc_index/src/bit_set/dense_bit_set.rs new file mode 100644 index 0000000000000..7ff6ef285b3ea --- /dev/null +++ b/compiler/rustc_index/src/bit_set/dense_bit_set.rs @@ -0,0 +1,1282 @@ +use std::alloc::{Layout, alloc, alloc_zeroed, dealloc, handle_alloc_error, realloc}; +use std::hash::{Hash, Hasher}; +use std::iter::FusedIterator; +use std::marker::PhantomData; +use std::mem::ManuallyDrop; +use std::ops::{Range, RangeInclusive}; +use std::ptr::NonNull; +use std::{fmt, iter, slice}; + +use itertools::Either; +#[cfg(feature = "nightly")] +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +use super::{ + BitRelations, CHUNK_WORDS, Chunk, ChunkedBitSet, WORD_BITS, Word, word_index_and_mask, +}; +use crate::Idx; + +/// A fixed-size bitset type with a dense representation, using only one [`Word`] on the stack. +/// +/// This bit set occupies only a single [`Word`] of stack space. It can represent a domain size +/// of up to `[WORD_BITS] - 1` directly inline. If the domain size exceeds this limit, it instead +/// becomes a pointer to a sequence of [`Word`]s on the heap. This makes it very efficient for +/// domain sizes smaller than `[WORD_BITS]`. +/// +/// Additionally, if the set does not fit in one [`Word`], there is a special inline +/// variant for the empty set. In this case, the domain size is stored inline along with a few +/// bits indicating that the set is empty. Allocation is deferred until needed, such as on +/// the first insert or remove operation. This avoids the need to wrap a lazily initialised bit set +/// in a [`OnceCell`](std::cell::OnceCell) or an [`Option`]—you can simply create an empty set and +/// populate it if needed. +/// +/// Note 1: Since this bitset is dense, if your domain is large and/or relatively homogeneous (e.g. +/// long runs of set or unset bits), it may be more efficient to use a +/// [`MixedBitSet`](crate::bit_set::MixedBitSet) or an +/// [`IntervalSet`](crate::interval::IntervalSet), which are better suited for sparse or highly +/// compressible domains. +/// +/// Note 2: Use [`GrowableBitSet`] if you require support for resizing after creation. +/// +/// `T` is an index type—typically a newtyped `usize` wrapper, but it may also simply be `usize`. +/// +/// Any operation involving an element may panic if the element is equal to or greater than the +/// domain size. Operations involving two bitsets may panic if their domain sizes differ. Panicking +/// is not garranteed though as we store the domain size rounded up to the next multiple of +/// [`WORD_BITS`]. +#[repr(C)] +pub union DenseBitSet { + /// The bit set fits in a single [`Word`] stored inline on the stack. + /// + /// The most significant bit is set to 1 to distinguish this from the other variants. You + /// must never change that "tag bit" after the bit set has been created. + /// + /// The remaining bits makes up the bit set. The exact domain size is not stored. + inline: Word, + + /// The bit set doesn't fit in a single word, but is empty and not yet allocated. + /// + /// The first (most significant) two bits are set to `[0, 1]` to distinguish this variant + /// from others. This tag is stored in [`Self::EMPTY_UNALLOCATED_TAG_BITS`]. The remaining bits + /// hold the domain size (capacity) **in words** of the set, which is needed if the set is + /// eventually allocated. + /// + /// Note that because the capacity is stored in words, not in bits, there is plenty of room + /// for the two tag bits. + empty_unallocated: usize, + + /// The bit set is stored on the heap. + /// + /// The two most significant bits are set to zero if this field is active. + on_heap: ManuallyDrop, + + /// This variant will never be created. + marker: PhantomData, +} + +impl DenseBitSet { + /// The maximum domain size that could be stored inlined on the stack. + pub const INLINE_CAPACITY: usize = WORD_BITS - 1; + + /// A [`Word`] with the most significant bit set. That is the tag bit telling that the set is + /// inlined. + const IS_INLINE_TAG_BIT: Word = 0x1 << (WORD_BITS - 1); + + /// The tag for the `empty_unallocated` variant. The two most significant bits are + /// `[0, 1]`. + const EMPTY_UNALLOCATED_TAG_BITS: usize = 0b01 << (usize::BITS - 2); + + /// Create a new empty bit set with a given domain_size. + /// + /// If `domain_size` is <= [`Self::INLINE_CAPACITY`], then it is stored inline on the stack, + /// otherwise it is stored on the heap. + #[inline] + pub fn new_empty(domain_size: usize) -> Self { + if domain_size <= Self::INLINE_CAPACITY { + // The first bit is set to indicate the union variant. + Self { inline: Self::IS_INLINE_TAG_BIT } + } else { + let num_words = domain_size.div_ceil(WORD_BITS); + debug_assert!(num_words.leading_zeros() >= 2); + Self { empty_unallocated: Self::EMPTY_UNALLOCATED_TAG_BITS | num_words } + } + } + + /// Create a new filled bit set. + #[inline] + pub fn new_filled(domain_size: usize) -> Self { + if domain_size <= Self::INLINE_CAPACITY { + Self { + inline: Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32) + | Self::IS_INLINE_TAG_BIT, + } + } else { + let num_words = domain_size.div_ceil(WORD_BITS); + let mut on_heap = BitSetOnHeap::new_empty(num_words); + let words = on_heap.as_mut_slice(); + for word in words.iter_mut() { + *word = Word::MAX; + } + // Remove excessive bits on the last word. + // Trust me: this mask is correct. + let last_word_mask = Word::MAX.wrapping_shr(domain_size.wrapping_neg() as u32); + *words.last_mut().unwrap() &= last_word_mask; + Self { on_heap: ManuallyDrop::new(on_heap) } + } + } + + /// Check if `self` is inlined. + // If this function returns `true`, it is safe to assume `self.inline`. Else, it is safe to + // assume `self.empty_unallocated`, or `self.on_heap`. + #[inline(always)] + pub fn is_inline(&self) -> bool { + // We check if the first bit is set. If so, it is inlined, otherwise it is on the heap. + (unsafe { self.inline } & Self::IS_INLINE_TAG_BIT) != 0 + } + + /// Check if `self` has a too large domain to be stored inline, is empty, and is not yet + /// allocated. + // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is + // safe to assume `self.inline`, or `self.on_heap`. + #[inline(always)] + pub const fn is_empty_unallocated(&self) -> bool { + const MASK: usize = usize::MAX << usize::BITS - 2; + (unsafe { self.empty_unallocated } & MASK) == Self::EMPTY_UNALLOCATED_TAG_BITS + } + + /// Check if `self` is `empty_unallocated` and if so return the number of words required to + /// store the expected capacity. + // If this function returns `true`, it is safe to assume `self.empty_unallocated`. Else, it is + // safe to assume `self.inline`, or `self.on_heap`. + #[inline(always)] + pub const fn empty_unallocated_get_num_words(&self) -> Option { + if self.is_empty_unallocated() { + Some(unsafe { self.empty_unallocated } ^ Self::EMPTY_UNALLOCATED_TAG_BITS) + } else { + None + } + } + + /// Check if `self` is allocated on the heap and return a reference to it in that case. + fn on_heap(&self) -> Option<&BitSetOnHeap> { + let self_word = unsafe { self.inline }; + // Check if the two most significant bits are 0. + if self_word & Word::MAX >> 2 == self_word { Some(unsafe { &self.on_heap }) } else { None } + } + + /// Check if `self` is allocated on the heap and return a mutable reference to it in that case. + fn on_heap_mut(&mut self) -> Option<&mut ManuallyDrop> { + let self_word = unsafe { self.inline }; + // Check if the two most significant bits are 0. + if self_word & Word::MAX >> 2 == self_word { + Some(unsafe { &mut self.on_heap }) + } else { + None + } + } + + /// If `self` is `empty_unallocated`, allocate it, otherwise return `self.on_heap_mut()`. + fn on_heap_get_or_alloc(&mut self) -> &mut BitSetOnHeap { + if let Some(num_words) = self.empty_unallocated_get_num_words() { + *self = Self { on_heap: ManuallyDrop::new(BitSetOnHeap::new_empty(num_words)) }; + unsafe { &mut self.on_heap } + } else { + self.on_heap_mut().unwrap() + } + } + + /// Get the capacity of this set. This is >= the initial domain size. + #[inline(always)] + pub(super) fn capacity(&self) -> usize { + if self.is_inline() { + Self::INLINE_CAPACITY + } else if let Some(num_words) = self.empty_unallocated_get_num_words() { + num_words * WORD_BITS + } else { + self.on_heap().unwrap().capacity() + } + } + + /// Checks if the bit set is empty. + #[inline(always)] + pub fn is_empty(&self) -> bool { + if self.is_inline() { + let x = unsafe { self.inline }; + x == Self::IS_INLINE_TAG_BIT + } else if self.is_empty_unallocated() { + true + } else { + self.on_heap().unwrap().is_empty() + } + } + + /// Clear the set. + #[inline(always)] + pub fn clear(&mut self) { + if self.is_inline() { + self.inline = Self::IS_INLINE_TAG_BIT + } else if let Some(on_heap) = self.on_heap_mut() { + for word in on_heap.as_mut_slice() { + *word = 0x0; + } + } + } + + /// Get an iterator of all words making up the set. + pub(super) fn words(&self) -> impl ExactSizeIterator { + if self.is_inline() { + let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT; + Either::Left(iter::once(word)) + } else if let Some(num_words) = self.empty_unallocated_get_num_words() { + Either::Right(Either::Left(iter::repeat_n(0, num_words))) + } else { + Either::Right(Either::Right(self.on_heap().unwrap().as_slice().iter().copied())) + } + } + + /// Checks if `self` is a (non-strict) superset of `other`. + /// + /// May panic if `self` and other have different sizes. + #[inline(always)] + pub fn superset(&self, other: &Self) -> bool { + // Function to check that a usize is a superset of another. + let word_is_superset = |x: Word, other: Word| (!x & other) == 0; + + if self.is_inline() { + let x = unsafe { self.inline }; + assert!(other.is_inline(), "bit sets has different domain sizes"); + let y = unsafe { other.inline }; + word_is_superset(x, y) + } else if other.is_empty_unallocated() { + true + } else { + let other_on_heap = other.on_heap().unwrap(); + if self.is_empty_unallocated() { + other_on_heap.is_empty() + } else { + let on_heap = self.on_heap().unwrap(); + let self_slice = on_heap.as_slice(); + let other_slice = other_on_heap.as_slice(); + debug_assert_eq!( + self_slice.len(), + other_slice.len(), + "bit sets have different domain sizes" + ); + self_slice.iter().zip(other_slice).all(|(&x, &y)| (!x & y) == 0) + } + } + } + + /// Count the number of set bits in the set. + #[inline(always)] + pub fn count(&self) -> usize { + if self.is_inline() { + let x = unsafe { self.inline }; + x.count_ones() as usize - 1 + } else if self.is_empty_unallocated() { + 0 + } else { + self.on_heap().unwrap().as_slice().iter().map(|w| w.count_ones() as usize).sum() + } + } + + /// Returns an iterator over the indices for all elements in this set. + #[inline(always)] + pub fn iter_usizes(&self) -> BitIter<'_, usize> { + if self.is_inline() { + let x = unsafe { self.inline }; + // Remove the tag bit. + let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT; + BitIter::from_single_word(without_tag_bit) + } else if let Some(on_heap) = self.on_heap() { + BitIter::from_slice(on_heap.as_slice()) + } else { + debug_assert!(self.is_empty_unallocated()); + BitIter::from_single_word(0) + } + } + + /// Insert the elem with index `idx`. Returns `true` if the set has changed. + #[inline(always)] + fn insert_usize(&mut self, idx: usize) -> bool { + // Insert the `i`th bit in a word and return `true` if it changed. + let insert_bit = |word: &mut Word, bit_idx: u32| { + let mask = 0x01 << bit_idx; + let old = *word; + *word |= mask; + *word != old + }; + + if self.is_inline() { + let x = unsafe { &mut self.inline }; + debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}"); + insert_bit(x, idx as u32) + } else { + let words = self.on_heap_get_or_alloc().as_mut_slice(); + + let word_idx = idx / WORD_BITS; + let bit_idx = (idx % WORD_BITS) as u32; + let word = &mut words[word_idx]; + insert_bit(word, bit_idx) + } + } + + /// Insert `0..domain_size` in the set. + /// + /// We would like an insert all function that doesn't require the domain size, but the exact + /// domain size is not stored so that is not possible. + #[inline(always)] + pub fn insert_all(&mut self, domain_size: usize) { + if self.is_inline() { + debug_assert!(domain_size <= Self::INLINE_CAPACITY); + unsafe { + self.inline |= Word::MAX.unbounded_shr(WORD_BITS as u32 - domain_size as u32) + }; + } else { + let on_heap = self.on_heap_get_or_alloc(); + debug_assert!(on_heap.capacity() >= domain_size, "domain size too big"); + let words = on_heap.as_mut_slice(); + + let (end_word_index, end_mask) = word_index_and_mask(domain_size - 1); + + for word_index in 0..end_word_index { + words[word_index] = Word::MAX; + } + + words[end_word_index] |= end_mask | (end_mask - 1); + } + } + + /// Sets `self = self | !other` for all elements less than `domain_size`. + #[inline(always)] + pub fn union_not(&mut self, other: &Self, domain_size: usize) { + if self.is_inline() { + assert!(other.is_inline()); + + let self_word = unsafe { &mut self.inline }; + let other_word = unsafe { other.inline }; + + debug_assert!(domain_size <= Self::INLINE_CAPACITY); + + *self_word |= !other_word & Word::MAX.unbounded_shr((WORD_BITS - domain_size) as u32); + } else if other.is_empty_unallocated() { + self.insert_all(domain_size); + } else { + let self_words = self.on_heap_get_or_alloc().as_mut_slice(); + let other_words = other.on_heap().unwrap().as_slice(); + + // Set all but the last word if domain_size is not divisible by `WORD_BITS`. + for (self_word, other_word) in + self_words.iter_mut().zip(other_words).take(domain_size / WORD_BITS) + { + *self_word |= !other_word; + } + + let remaining_bits = domain_size % WORD_BITS; + if remaining_bits > 0 { + let last_idx = domain_size / WORD_BITS; + self_words[last_idx] |= !other_words[last_idx] & !(Word::MAX << remaining_bits); + } + } + } + + /// Common function for union/intersection-like operations. + /// + /// This function takes two bit sets—one mutably, one immutably. Neither must be the + /// `empty_unallocated` variant. It asserts that they have the same `domain_size`, then applies a function to + /// each pair of words, effectively performing a zip-like operation. + /// It checks whether `self` has changed; if so, it returns `true`, otherwise `false`. + /// + /// ## Safety + /// + /// - Neither set must be `self.empty_unallocated`. + /// - If the sets are inlined, this will leave the tag bit set to 1. You must not modify it—doing so + /// results in undefined behaviour. This may be inconvenient for operations such as subtraction; + /// in such cases, use `binary_operation_safe` instead. + #[inline(always)] + unsafe fn binary_operation(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool { + debug_assert!(!self.is_empty_unallocated()); + debug_assert!(!other.is_empty_unallocated()); + + // Apply `op` and return if the word changed. + let apply_and_check_change = |x: &mut Word, y: Word| -> bool { + let old = *x; + op(x, y); + *x != old + }; + + if self.is_inline() { + let x = unsafe { &mut self.inline }; + assert!(other.is_inline(), "bit sets has different domain sizes"); + let y = unsafe { other.inline }; + apply_and_check_change(x, y) + } else { + let self_on_heap = unsafe { &mut self.on_heap }; + assert!(!other.is_inline(), "bit sets has different domain sizes"); + let other_on_heap = unsafe { &other.on_heap }; + let self_slice = self_on_heap.as_mut_slice(); + let other_slice = other_on_heap.as_slice(); + assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes"); + let mut has_changed = false; + for (x, y) in self_slice.iter_mut().zip(other_slice) { + has_changed |= apply_and_check_change(x, *y); + } + has_changed + } + } + + /// Similar to [`Self::binary_operation`], but restores the tag bit if it has changed. + /// + /// Note that the tag bit will still be set in the call to `op`, but there is no danger in + /// changing it as it will be restored afterwords. + /// + /// ## Safety + /// + /// Neither set must be `self.empty_unallocated`. + #[inline(always)] + unsafe fn binary_operation_safe(&mut self, other: &Self, op: impl Fn(&mut Word, Word)) -> bool { + debug_assert!(!self.is_empty_unallocated()); + debug_assert!(!other.is_empty_unallocated()); + + if self.is_inline() { + let x = unsafe { &mut self.inline }; + assert!(other.is_inline(), "bit sets has different domain sizes"); + let y = unsafe { other.inline }; + + let old = *x; + op(x, y); + *x |= Self::IS_INLINE_TAG_BIT; + old != *x + } else { + let self_on_heap = unsafe { &mut self.on_heap }; + assert!(!other.is_inline(), "bit sets has different domain sizes"); + let other_on_heap = unsafe { &other.on_heap }; + let self_slice = self_on_heap.as_mut_slice(); + let other_slice = other_on_heap.as_slice(); + assert_eq!(self_slice.len(), other_slice.len(), "bit sets have different domain sizes"); + let mut has_changed = false; + for (x, y) in self_slice.iter_mut().zip(other_slice) { + let old = *x; + op(x, *y); + has_changed |= old != *x; + } + has_changed + } + } + + super::bit_relations_inherent_impls! {} +} + +impl BitRelations> for DenseBitSet { + #[inline(always)] + fn union(&mut self, other: &Self) -> bool { + if self.is_empty_unallocated() { + debug_assert!(!other.is_inline()); + *self = other.clone(); + !self.is_empty() + } else if other.is_empty_unallocated() { + false + } else { + // SAFETY: The union operation does not remove any bit set to 1, so the tag bit is + // unaffected. + unsafe { self.binary_operation(other, |x, y| *x |= y) } + } + } + + #[inline(always)] + fn intersect(&mut self, other: &Self) -> bool { + if self.is_empty_unallocated() { + false + } else if other.is_empty_unallocated() { + debug_assert!(!self.is_inline()); + let was_empty = self.is_empty(); + self.clear(); + !was_empty + } else { + // SAFETY: Since the tag bit is set in both `self` and `other`, the intersection won't + // remove it. + unsafe { self.binary_operation(other, |x, y| *x &= y) } + } + } + + #[inline(always)] + fn subtract(&mut self, other: &Self) -> bool { + if self.is_empty_unallocated() || other.is_empty_unallocated() { + false + } else { + unsafe { self.binary_operation_safe(other, |x, y| *x &= !y) } + } + } +} + +impl DenseBitSet { + /// Checks if the bit set contains `elem`. + #[inline(always)] + pub fn contains(&self, elem: T) -> bool { + // Check if the `i`th bit is set in a word. + let contains_bit = |word: Word, bit_idx: u32| { + let mask = 0x01 << bit_idx; + (word & mask) != 0 + }; + + let idx = elem.index(); + if self.is_inline() { + let x = unsafe { self.inline }; + debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}"); + contains_bit(x, idx as u32) + } else if let Some(on_heap) = self.on_heap() { + let word_idx = idx / WORD_BITS; + let bit_idx = (idx % WORD_BITS) as u32; + let word = on_heap.as_slice()[word_idx]; + contains_bit(word, bit_idx) + } else { + debug_assert!(self.is_empty_unallocated()); + false + } + } + + /// Insert `elem`. Returns `true` if the set has changed. + #[inline(always)] + pub fn insert(&mut self, elem: T) -> bool { + self.insert_usize(elem.index()) + } + + /// Remove `elem`. Returns `true` if the set has changed. + #[inline(always)] + pub fn remove(&mut self, elem: T) -> bool { + // Remove the `i`th bit in a word and return `true` if it changed. + let remove_bit = |word: &mut Word, bit_idx: u32| { + let mask = !(0x01 << bit_idx); + let old = *word; + *word &= mask; + *word != old + }; + + let idx = elem.index(); + if self.is_inline() { + let x = unsafe { &mut self.inline }; + debug_assert!(idx < Self::INLINE_CAPACITY, "index too large: {idx}"); + remove_bit(x, idx as u32) + } else if let Some(on_heap) = self.on_heap_mut() { + let word_idx = idx / WORD_BITS; + let bit_idx = (idx % WORD_BITS) as u32; + let word = &mut on_heap.as_mut_slice()[word_idx]; + remove_bit(word, bit_idx) + } else { + debug_assert!(self.is_empty_unallocated()); + // Nothing to be removed. + false + } + } + + /// Returns an iterator over all elements in this set. + #[inline(always)] + pub fn iter(&self) -> BitIter<'_, T> { + if self.is_inline() { + let x = unsafe { self.inline }; + // Remove the tag bit. + let without_tag_bit = x ^ Self::IS_INLINE_TAG_BIT; + BitIter::from_single_word(without_tag_bit) + } else if let Some(on_heap) = self.on_heap() { + BitIter::from_slice(on_heap.as_slice()) + } else { + debug_assert!(self.is_empty_unallocated()); + BitIter::from_single_word(0) + } + } + + /// Returns `Some(elem)` if the set contains exactly one elemement otherwise returns `None`. + #[inline(always)] + pub fn only_one_elem(&self) -> Option { + if self.is_inline() { + let word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT; + if word.is_power_of_two() { Some(T::new(word.trailing_zeros() as usize)) } else { None } + } else if self.is_empty_unallocated() { + None + } else { + let words = self.on_heap().unwrap().as_slice(); + let mut found_elem = None; + for (i, &word) in words.iter().enumerate() { + if word > 0 { + if found_elem.is_some() { + return None; + } + if word.is_power_of_two() { + found_elem = + Some(T::new(i * WORD_BITS as usize + word.trailing_zeros() as usize)); + } else { + return None; + } + } + } + found_elem + } + } + + #[inline] + pub fn insert_range(&mut self, range: Range) { + if let Some(end) = range.end.index().checked_sub(1) { + self.insert_range_inclusive(RangeInclusive::new(range.start, Idx::new(end))); + } + } + + #[inline(always)] + pub fn insert_range_inclusive(&mut self, range: RangeInclusive) { + let start = range.start().index(); + let end = range.end().index(); + + if start > end { + return; + } + + if self.is_inline() { + debug_assert!(end < Self::INLINE_CAPACITY); + let mask = (1 << end) | ((1 << end) - (1 << start)); + unsafe { self.inline |= mask }; + } else { + let words = self.on_heap_get_or_alloc().as_mut_slice(); + + let (start_word_index, start_mask) = word_index_and_mask(start); + let (end_word_index, end_mask) = word_index_and_mask(end); + + // Set all words in between start and end (exclusively of both). + for word_index in (start_word_index + 1)..end_word_index { + words[word_index] = !0; + } + + if start_word_index != end_word_index { + // Start and end are in different words, so we handle each in turn. + // + // We set all leading bits. This includes the start_mask bit. + words[start_word_index] |= !(start_mask - 1); + // And all trailing bits (i.e. from 0..=end) in the end word, + // including the end. + words[end_word_index] |= end_mask | (end_mask - 1); + } else { + words[start_word_index] |= end_mask | (end_mask - start_mask); + } + } + } + + #[inline(always)] + pub fn last_set_in(&self, range: RangeInclusive) -> Option { + let start = range.start().index(); + let end = range.end().index(); + + if start > end { + return None; + } + + if self.is_inline() { + debug_assert!(end < Self::INLINE_CAPACITY); + let mut word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT; + let end_bit = 1 << end; + // Set all bits mor significant than `end_bit` to zero. + word &= end_bit | (end_bit - 1); + if word != 0 { + let pos = max_bit(word); + if start <= pos { Some(T::new(pos)) } else { None } + } else { + None + } + } else if let Some(on_heap) = self.on_heap() { + let words = on_heap.as_slice(); + + let (start_word_index, _) = word_index_and_mask(start); + let (end_word_index, end_mask) = word_index_and_mask(end); + + let end_word = words[end_word_index] & (end_mask | (end_mask - 1)); + if end_word != 0 { + let pos = max_bit(end_word) + WORD_BITS * end_word_index; + if start <= pos { + return Some(T::new(pos)); + } + } + + // We exclude end_word_index from the range here, because we don't want + // to limit ourselves to *just* the last word: the bits set it in may be + // after `end`, so it may not work out. + if let Some(offset) = + words[start_word_index..end_word_index].iter().rposition(|&w| w != 0) + { + let word_idx = start_word_index + offset; + let start_word = words[word_idx]; + let pos = max_bit(start_word) + WORD_BITS * word_idx; + if start <= pos { Some(T::new(pos)) } else { None } + } else { + None + } + } else { + debug_assert!(self.is_empty_unallocated()); + None + } + } +} + +impl BitRelations> for DenseBitSet { + fn union(&mut self, other: &ChunkedBitSet) -> bool { + other.iter().fold(false, |changed, elem| self.insert(elem) || changed) + } + + fn subtract(&mut self, _other: &ChunkedBitSet) -> bool { + unimplemented!("implement if/when necessary"); + } + + fn intersect(&mut self, other: &ChunkedBitSet) -> bool { + if self.is_inline() { + assert!(other.domain_size <= Self::INLINE_CAPACITY); + if other.domain_size == 0 { + return false; + } + + let word = unsafe { &mut self.inline }; + let old_word = *word; + match &other.chunks[0] { + Chunk::Zeros(d) => { + debug_assert_eq!(usize::from(*d), other.domain_size); + let mask = Word::MAX << other.domain_size(); + *word &= mask; + } + Chunk::Ones(_) => (), + Chunk::Mixed(d, _, words) => { + debug_assert_eq!(usize::from(*d), other.domain_size); + *word &= words[0] | Self::IS_INLINE_TAG_BIT; + } + } + *word != old_word + } else if let Some(on_heap) = self.on_heap_mut() { + let all_words = on_heap.as_mut_slice(); + + let mut changed = false; + for (i, chunk) in other.chunks.iter().enumerate() { + let mut words = &mut all_words[i * CHUNK_WORDS..]; + if words.len() > CHUNK_WORDS { + words = &mut words[..CHUNK_WORDS]; + } + match chunk { + Chunk::Zeros(..) => { + for word in words { + if *word != 0 { + changed = true; + *word = 0; + } + } + } + Chunk::Ones(..) => (), + Chunk::Mixed(_, _, data) => { + for (i, word) in words.iter_mut().enumerate() { + let new_val = *word & data[i]; + if new_val != *word { + changed = true; + *word = new_val; + } + } + } + } + } + changed + } else { + debug_assert!(self.is_empty_unallocated()); + false + } + } +} + +#[cfg(feature = "nightly")] +impl Encodable for DenseBitSet { + #[inline(never)] // FIXME: For profiling purposes + fn encode(&self, s: &mut S) { + // The encoding is as follows: + // + // The `inline` and `empty_unallocated` variants are encoded as a single `Word`. Here, we + // consider the `empty_unallocated` variant as the `inline` variant because + // `empty_unallocated: usize`, `inline: Word`, and `usize` is smaller than `Word`. + // + // The `on_heap` variant is encoded as follows: First, the number of `Word`s are encoded + // with a single `Word`. We assert that the two most significant bits of this number are 0 + // to distinguish it from the `inline` and `empty_unallocated` variants. Then all the words are + // encoded in sequence. + + if let Some(on_heap) = self.on_heap() { + let n_words: Word = on_heap.n_words(); + debug_assert_eq!( + n_words >> WORD_BITS - 2, + 0x0, + "the two most significant bits must be 0" + ); + n_words.encode(s); + debug_assert_eq!(n_words as usize, on_heap.as_slice().len()); + for word in on_heap.as_slice().iter() { + word.encode(s); + } + } else { + let word = unsafe { self.inline }; + debug_assert!(word >> WORD_BITS - 2 != 0, "the 2 most significant bits must not be 0"); + word.encode(s); + } + } +} + +#[cfg(feature = "nightly")] +impl Decodable for DenseBitSet { + #[inline(never)] // FIXME: For profiling purposes + fn decode(d: &mut D) -> Self { + // First we read one `Word` and check the variant. + let word = Word::decode(d); + if word >> WORD_BITS - 2 == 0x0 { + // If the two most significant bits are 0, then this is the `on_heap` variant and the + // number of words is encoded by `word`. + let n_words = word as usize; + assert!( + n_words > 0, + "DenseBitSet decoder error: At least one word must be stored with the `on_heap` variant." + ); + let mut on_heap = BitSetOnHeap::new_empty(n_words); + + let words = on_heap.as_mut_slice(); + // All `words` are now initialised to 0x0. + debug_assert_eq!(words.len(), n_words); + + // Decode the words one-by-one. + for word in words.iter_mut() { + *word = Word::decode(d); + } + + DenseBitSet { on_heap: ManuallyDrop::new(on_heap) } + } else { + // Both the `inline` and `empty_unallocated` variants are encoded by one `Word`. We can + // just assume the `inline` variant because the `empty_unallocated` variant is smaller + // and the union is `repr(C)`. + Self { inline: word } + } + } +} + +impl Clone for DenseBitSet { + #[inline(always)] + fn clone(&self) -> Self { + if self.is_inline() { + let inline = unsafe { self.inline }; + Self { inline } + } else if self.is_empty_unallocated() { + let empty_unallocated = unsafe { self.empty_unallocated }; + Self { empty_unallocated } + } else { + let old_on_heap = unsafe { &self.on_heap }; + let on_heap = old_on_heap.clone(); + Self { on_heap } + } + } +} + +impl Drop for DenseBitSet { + #[inline(always)] + fn drop(&mut self) { + // Deallocate if `self` is not inlined. + if let Some(on_heap) = self.on_heap_mut() { + unsafe { + ManuallyDrop::drop(on_heap); + } + } + } +} + +/// A pointer to a dense bit set stored on the heap. +/// +/// This struct is a `usize`, with its two most significant bits always set to 0. If the value is +/// shifted left by 2 bits, it yields a pointer to a sequence of words on the heap. The first word +/// in this sequence represents the length—it indicates how many words follow. These subsequent +/// words make up the actual bit set. +/// +/// For example, suppose the bit set should support a domain size of 240 bits. We first determine +/// how many words are needed to store 240 bits—that’s 4 words, assuming `[WORD_BITS] == 64`. +/// The pointer in this struct then points to a sequence of five words allocated on the heap. The +/// first word has the value 4 (the length), and the remaining four words comprise the bit set. +#[repr(transparent)] +struct BitSetOnHeap(usize); + +impl BitSetOnHeap { + fn new_empty(len: usize) -> Self { + debug_assert!(len >= 1); + + // The first word is used to store the total number of words. The rest of the words + // store the bits. + let num_words = len + 1; + + let layout = Layout::array::(num_words).expect("Bit set too large"); + // SAFETY: `num_words` is always at least `1` so we never allocate zero size. + let ptr = unsafe { alloc_zeroed(layout).cast::() }; + let Some(ptr) = NonNull::::new(ptr) else { + handle_alloc_error(layout); + }; + + // Store the length in the first word. + unsafe { ptr.write(len as Word) }; + + // Convert `ptr` to a `usize` and shift it two bits to the right. + BitSetOnHeap((ptr.as_ptr() as usize) >> 2) + } + + /// Get a slice with all bits in this bit set. + /// + /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So + /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a + /// 64-bit machine. + #[inline] + fn as_slice(&self) -> &[Word] { + let ptr = (self.0 << 2) as *const Word; + let len = unsafe { ptr.read() } as usize; + // The slice starts at the second word. + unsafe { slice::from_raw_parts(ptr.add(1), len) } + } + + /// Get a mutable slice with all bits in this bit set. + /// + /// Note that the number of bits in the set is rounded up to the next power of `Usize::BITS`. So + /// if the user requested a domain_size of 216 bits, a slice with 4 words will be returned on a + /// 64-bit machine. + #[inline] + fn as_mut_slice(&mut self) -> &mut [Word] { + let ptr = (self.0 << 2) as *mut Word; + let len = unsafe { ptr.read() } as usize; + // The slice starts at the second word. + unsafe { slice::from_raw_parts_mut(ptr.add(1), len) } + } + + /// Check if the set is empty. + fn is_empty(&self) -> bool { + self.as_slice().iter().all(|&x| x == 0) + } + + /// Get the number of words. + #[allow(dead_code)] // FIXME + #[inline] + fn n_words(&self) -> Word { + let ptr = (self.0 << 2) as *const Word; + unsafe { ptr.read() } + } + + /// Get the capacity, that is the number of elements that can be stored in this set. + fn capacity(&self) -> usize { + let ptr = (self.0 << 2) as *const Word; + let len = unsafe { ptr.read() } as usize; + len * WORD_BITS + } + + /// Make sure the set can hold at least `min_domain_size` elements. Reallocate if necessary. + fn ensure_capacity(&mut self, min_domain_size: usize) { + let len = min_domain_size.div_ceil(WORD_BITS); + + let old_ptr = (self.0 << 2) as *const Word; + let old_len = unsafe { old_ptr.read() } as usize; + + if len <= old_len { + return; + } + + // The first word is used to store the total number of words. The rest of the words + // store the bits. + let num_words = len + 1; + let old_num_words = old_len + 1; + + let new_layout = Layout::array::(num_words).expect("Bit set too large"); + let old_layout = Layout::array::(old_num_words).expect("Bit set too large"); + + // SAFETY: `num_words` is always at least `1` so we never allocate zero size. + let ptr = + unsafe { realloc(old_ptr as *mut u8, old_layout, new_layout.size()).cast::() }; + let Some(ptr) = NonNull::::new(ptr) else { + handle_alloc_error(new_layout); + }; + + // Store the length in the first word. + unsafe { ptr.write(len as Word) }; + + // Set all the new words to 0. + for word_idx in old_num_words..num_words { + unsafe { ptr.add(word_idx).write(0x0) } + } + + // Convert `ptr` to a `usize` and shift it two bits to the right. + self.0 = (ptr.as_ptr() as usize) >> 2 + } +} + +impl Clone for BitSetOnHeap { + fn clone(&self) -> Self { + let ptr = (self.0 << 2) as *const Word; + let len = unsafe { ptr.read() } as usize; + let num_words = len + 1; + + let layout = Layout::array::(num_words).expect("Bit set too large"); + // SAFETY: `num_words` is always at least `1` so we never allocate zero size. + let new_ptr = unsafe { alloc(layout).cast::() }; + let Some(new_ptr) = NonNull::::new(new_ptr) else { + handle_alloc_error(layout); + }; + + unsafe { ptr.copy_to_nonoverlapping(new_ptr.as_ptr(), num_words) }; + + BitSetOnHeap((new_ptr.as_ptr() as usize) >> 2) + } +} + +impl Drop for BitSetOnHeap { + fn drop(&mut self) { + let ptr = (self.0 << 2) as *mut Word; + + // SAFETY: The first word stores the number of words for the bit set. We have to add 1 + // because the first word storing the length is allocated as well. + let num_words = unsafe { ptr.read() } as usize + 1; + let layout = Layout::array::(num_words).expect("Bit set too large"); + // SAFETY: We know that `on_heap` has been allocated with the same layout. See the + // `new` method for reference. + unsafe { dealloc(ptr.cast::(), layout) }; + } +} + +pub struct BitIter<'a, T: Idx> { + /// A copy of the current word, but with any already-visited bits cleared. + /// (This lets us use `trailing_zeros()` to find the next set bit.) When it + /// is reduced to 0, we move onto the next word. + word: Word, + + /// The offset (measured in bits) of the current word. + offset: usize, + + /// Underlying iterator over the words. + iter: slice::Iter<'a, Word>, + + marker: PhantomData, +} + +impl<'a, T: Idx> BitIter<'a, T> { + pub(super) fn from_slice(words: &'a [Word]) -> Self { + // We initialize `word` and `offset` to degenerate values. On the first + // call to `next()` we will fall through to getting the first word from + // `iter`, which sets `word` to the first word (if there is one) and + // `offset` to 0. Doing it this way saves us from having to maintain + // additional state about whether we have started. + Self { + word: 0, + offset: usize::MAX - (WORD_BITS - 1), + iter: words.iter(), + marker: PhantomData, + } + } + + #[inline(always)] + fn from_single_word(word: Word) -> Self { + Self { word, offset: 0, iter: [].iter(), marker: PhantomData } + } +} + +impl<'a, T: Idx> Iterator for BitIter<'a, T> { + type Item = T; + + #[inline(always)] + fn next(&mut self) -> Option { + loop { + if self.word != 0 { + // Get the position of the next set bit in the current word, + // then clear the bit. + let bit_pos = self.word.trailing_zeros() as usize; + self.word ^= 0x01 << bit_pos; + return Some(T::new(bit_pos + self.offset)); + } + + // Move onto the next word. `wrapping_add()` is needed to handle + // the degenerate initial value given to `offset` in `new()`. + self.word = *self.iter.next()?; + self.offset = self.offset.wrapping_add(WORD_BITS); + } + } +} + +impl<'a, T: Idx> FusedIterator for BitIter<'a, T> {} + +impl fmt::Debug for DenseBitSet { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + w.debug_list().entries(self.iter()).finish() + } +} + +impl PartialEq for DenseBitSet { + #[inline] + fn eq(&self, other: &Self) -> bool { + if self.is_inline() { + if other.is_inline() { + unsafe { self.inline == other.inline } + } else if other.is_empty_unallocated() { + self.is_empty() + } else { + let other_words = other.on_heap().unwrap().as_slice(); + let self_word = unsafe { self.inline } ^ Self::IS_INLINE_TAG_BIT; + other_words[0] == self_word && other_words[1..].iter().all(|&w| w == 0) + } + } else if self.is_empty_unallocated() { + other.is_empty() + } else { + let self_words = self.on_heap().unwrap().as_slice(); + if other.is_empty_unallocated() { + self_words.iter().all(|&w| w == 0) + } else if other.is_inline() { + let other_word = unsafe { other.inline } ^ Self::IS_INLINE_TAG_BIT; + self_words[0] == other_word && self_words[1..].iter().all(|&w| w == 0) + } else { + let mut self_words = self_words.iter(); + let mut other_words = other.on_heap().unwrap().as_slice().iter(); + loop { + match (self_words.next(), other_words.next()) { + (Some(w1), Some(w2)) if w1 == w2 => (), + (Some(_), Some(_)) => break false, + (Some(0), None) | (None, Some(0)) => (), + (Some(_), None) | (None, Some(_)) => break false, + (None, None) => break true, + } + } + } + } + } +} + +impl Eq for DenseBitSet {} + +impl Hash for DenseBitSet { + #[inline] + fn hash(&self, hasher: &mut H) { + if self.is_inline() { + let inline = unsafe { self.inline }; + inline.hash(hasher); + } else if let Some(num_words) = self.empty_unallocated_get_num_words() { + // Now we hash 0 for `num_words` times so that this hash should be equal to a cleared + // set with the `on_heap` variant. + for _ in 0..num_words { + let zero_word: Word = 0x0; + zero_word.hash(hasher); + } + } else { + let words = self.on_heap().unwrap().as_slice(); + for word in words { + word.hash(hasher); + } + } + } +} + +/// A resizable bitset type with a dense representation. +/// +/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also +/// just be `usize`. +/// +/// All operations that involve an element will panic if the element is equal +/// to or greater than the domain size. +#[derive(Clone, PartialEq)] +pub struct GrowableBitSet { + bit_set: DenseBitSet, +} + +impl Default for GrowableBitSet { + fn default() -> Self { + GrowableBitSet::new_empty() + } +} + +impl GrowableBitSet { + /// Ensure that the set can hold at least `min_domain_size` elements. + pub fn ensure(&mut self, min_domain_size: usize) { + if min_domain_size <= self.bit_set.capacity() { + return; + } + + if self.bit_set.is_inline() { + // The set must change from being inlined to allocate on the heap. + debug_assert!(min_domain_size > DenseBitSet::::INLINE_CAPACITY); + + let mut new_bit_set = DenseBitSet::new_empty(min_domain_size); + if !self.bit_set.is_empty() { + // SAFETY: We know that `self.is_inline()` is true. + let word = unsafe { self.bit_set.inline } ^ DenseBitSet::::IS_INLINE_TAG_BIT; + new_bit_set.on_heap_get_or_alloc().as_mut_slice()[0] = word; + } + self.bit_set = new_bit_set; + } else if self.bit_set.is_empty_unallocated() { + self.bit_set = DenseBitSet::new_empty(min_domain_size); + } else { + self.bit_set.on_heap_mut().unwrap().ensure_capacity(min_domain_size); + } + } + + pub fn new_empty() -> GrowableBitSet { + GrowableBitSet { bit_set: DenseBitSet::new_empty(0) } + } + + pub fn with_capacity(capacity: usize) -> GrowableBitSet { + GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) } + } + + /// Insert the element with index `idx`. Returns `true` if the set has changed. + #[inline] + pub fn insert_usize(&mut self, idx: usize) -> bool { + self.ensure(idx + 1); + self.bit_set.insert_usize(idx) + } +} + +impl GrowableBitSet { + /// Insert `elem` into the set, resizing if necessary. Returns `true` if the set has changed. + #[inline] + pub fn insert(&mut self, elem: T) -> bool { + self.insert_usize(elem.index()) + } + + /// Returns `true` if the set has changed. + #[inline] + pub fn remove(&mut self, elem: T) -> bool { + self.ensure(elem.index() + 1); + self.bit_set.remove(elem) + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.bit_set.is_empty() + } + + #[inline] + pub fn contains(&self, elem: T) -> bool { + elem.index() < self.bit_set.capacity() && self.bit_set.contains(elem) + } + + #[inline] + pub fn iter(&self) -> BitIter<'_, T> { + self.bit_set.iter() + } + + #[inline] + pub fn len(&self) -> usize { + self.bit_set.count() + } +} + +impl From> for GrowableBitSet { + fn from(bit_set: DenseBitSet) -> Self { + Self { bit_set } + } +} + +impl From> for DenseBitSet { + fn from(bit_set: GrowableBitSet) -> Self { + bit_set.bit_set + } +} + +impl fmt::Debug for GrowableBitSet { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + self.bit_set.fmt(w) + } +} + +#[inline] +fn max_bit(word: Word) -> usize { + WORD_BITS - 1 - word.leading_zeros() as usize +} diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs index 323a66ddc6f20..eea19cb11a101 100644 --- a/compiler/rustc_index/src/bit_set/tests.rs +++ b/compiler/rustc_index/src/bit_set/tests.rs @@ -1,9 +1,583 @@ +use std::collections::BTreeSet; +use std::hash::{BuildHasher, BuildHasherDefault, DefaultHasher}; +use std::hint::black_box; +use std::ops::{Range, RangeBounds, RangeInclusive}; + +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use test::Bencher; + use super::*; +use crate::IndexVec; extern crate test; -use std::hint::black_box; -use test::Bencher; +/// A very simple pseudo random generator using linear xorshift. +/// +/// [See Wikipedia](https://en.wikipedia.org/wiki/Xorshift). This has 64-bit state and a period +/// of `2^64 - 1`. +struct Rng(u64); + +impl Rng { + fn new(seed: u64) -> Self { + Rng(seed) + } + + fn next(&mut self) -> usize { + self.0 ^= self.0 << 7; + self.0 ^= self.0 >> 9; + self.0 as usize + } + + fn next_bool(&mut self) -> bool { + self.next() % 2 == 0 + } + + /// Sample a range, a subset of `0..=max`. + /// + /// The purpose of this method is to make edge cases such as `0..=max` more common. + fn sample_range(&mut self, max: usize) -> RangeInclusive { + let start = match self.next() % 3 { + 0 => 0, + 1 => max, + 2 => self.next() % (max + 1), + _ => unreachable!(), + }; + let end = match self.next() % 3 { + 0 => 0, + 1 => max, + 2 => self.next() % (max + 1), + _ => unreachable!(), + }; + RangeInclusive::new(start, end) + } +} + +#[derive(Default)] +struct EncoderLittleEndian { + bytes: Vec, +} + +impl Encoder for EncoderLittleEndian { + fn emit_usize(&mut self, v: usize) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_u8(&mut self, v: u8) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_u16(&mut self, v: u16) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_u32(&mut self, v: u32) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_u64(&mut self, v: u64) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_u128(&mut self, v: u128) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_isize(&mut self, v: isize) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_i8(&mut self, v: i8) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_i16(&mut self, v: i16) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_i32(&mut self, v: i32) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_i64(&mut self, v: i64) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_i128(&mut self, v: i128) { + self.bytes.extend(v.to_le_bytes()); + } + fn emit_raw_bytes(&mut self, v: &[u8]) { + self.bytes.extend(v); + } +} + +struct DecoderLittleEndian<'a> { + bytes: &'a [u8], + /// Remember the original `bytes.len()` so we can calculate how many bytes we've read. + original_len: usize, +} + +impl<'a> DecoderLittleEndian<'a> { + fn new(bytes: &'a [u8]) -> Self { + Self { bytes, original_len: bytes.len() } + } +} + +impl<'a> Decoder for DecoderLittleEndian<'a> { + fn read_usize(&mut self) -> usize { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + usize::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_u128(&mut self) -> u128 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + u128::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_u64(&mut self) -> u64 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + u64::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_u32(&mut self) -> u32 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + u32::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_u16(&mut self) -> u16 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + u16::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_u8(&mut self) -> u8 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + u8::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_isize(&mut self) -> isize { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + isize::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_i128(&mut self) -> i128 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + i128::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_i64(&mut self) -> i64 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + i64::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_i32(&mut self) -> i32 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + i32::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_i16(&mut self) -> i16 { + let (int_bytes, rest) = self.bytes.split_at(size_of::()); + self.bytes = rest; + i16::from_le_bytes(int_bytes.try_into().unwrap()) + } + fn read_raw_bytes(&mut self, len: usize) -> &[u8] { + let (bytes, rest) = self.bytes.split_at(len); + self.bytes = rest; + bytes + } + fn peek_byte(&self) -> u8 { + self.bytes[0] + } + fn position(&self) -> usize { + self.original_len - self.bytes.len() + } +} + +fn test_with_domain_size(domain_size: usize) { + const TEST_ITERATIONS: u32 = 512; + + let mut set_1 = DenseBitSet::::new_empty(domain_size); + let mut set_1_reference = IndexVec::::from_elem_n(false, domain_size); + let mut set_2 = DenseBitSet::::new_empty(domain_size); + let mut set_2_reference = IndexVec::::from_elem_n(false, domain_size); + + let hasher = BuildHasherDefault::::new(); + + let mut encoder = EncoderLittleEndian::default(); + + let mut rng = Rng::new(42); + + for _ in 0..TEST_ITERATIONS { + // Make a random operation. + match rng.next() % 100 { + 0..20 => { + // Insert in one of the sets. + if domain_size == 0 { + continue; + } + let elem = rng.next() % domain_size; + // Choose set to insert into. + if rng.next_bool() { + assert_eq!(!set_1.contains(elem), set_1.insert(elem)); + set_1_reference[elem] = true; + } else { + assert_eq!(!set_2.contains(elem), set_2.insert(elem)); + set_2_reference[elem] = true; + } + } + 20..40 => { + // Insert a range in one of the sets. + if domain_size == 0 { + continue; + } + + let range = rng.sample_range(domain_size - 1); + // Choose set to insert into. + if rng.next_bool() { + set_1.insert_range_inclusive(range.clone()); + for i in range { + set_1_reference[i] = true; + } + } else { + set_2.insert_range_inclusive(range.clone()); + for i in range { + set_2_reference[i] = true; + } + } + } + 40..50 => { + // Test insert_all(). + if rng.next_bool() { + set_1.insert_all(domain_size); + for x in set_1_reference.iter_mut() { + *x = true; + } + } else { + set_2.insert_all(domain_size); + for x in set_2_reference.iter_mut() { + *x = true; + } + } + } + 50..70 => { + // Remove from one of the sets. + if domain_size == 0 { + continue; + } + let elem = rng.next() % domain_size; + // Choose set to remove into. + if rng.next_bool() { + assert_eq!(set_1.contains(elem), set_1.remove(elem),); + set_1_reference[elem] = false; + } else { + assert_eq!(set_2.contains(elem), set_2.remove(elem),); + set_2_reference[elem] = false; + } + } + 70..76 => { + // Union + let old_set_1 = set_1.clone(); + let changed = set_1.union(&set_2); + assert_eq!(changed, old_set_1 != set_1); + + // Adjust the reference sets. + for (x, val) in set_2_reference.iter_enumerated() { + set_1_reference[x] |= val; + } + } + 76..82 => { + // Intersection + let old_set_1 = set_1.clone(); + let changed = set_1.intersect(&set_2); + assert_eq!(changed, old_set_1 != set_1); + + // Adjust the reference sets. + for (x, val) in set_2_reference.iter_enumerated() { + set_1_reference[x] &= val; + } + } + 82..88 => { + // Subtraction + let old_set_1 = set_1.clone(); + let changed = set_1.subtract(&set_2); + assert_eq!(changed, old_set_1 != set_1); + + // Adjust the reference sets. + for (x, val) in set_2_reference.iter_enumerated() { + set_1_reference[x] &= !val; + } + } + 88..94 => { + // Union_not + set_1.union_not(&set_2, domain_size); + + // Adjust the reference sets. + for (x, val) in set_2_reference.iter_enumerated() { + set_1_reference[x] |= !val; + } + } + 94..97 => { + // Clear + if rng.next_bool() { + set_1.clear(); + for x in set_1_reference.iter_mut() { + *x = false; + } + } else { + set_2.clear(); + for x in set_2_reference.iter_mut() { + *x = false; + } + } + } + 97..100 => { + // Test new_filled(). + if rng.next_bool() { + set_1 = DenseBitSet::new_filled(domain_size); + for x in set_1_reference.iter_mut() { + *x = true; + } + } else { + set_2 = DenseBitSet::new_filled(domain_size); + for x in set_2_reference.iter_mut() { + *x = true; + } + } + } + _ => unreachable!(), + } + + // Check the contains function. + for i in 0..domain_size { + assert_eq!(set_1.contains(i), set_1_reference[i]); + assert_eq!(set_2.contains(i), set_2_reference[i]); + } + + // Check iter function. + assert!( + set_1.iter().eq(set_1_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x)) + ); + assert!( + set_2.iter().eq(set_2_reference.iter_enumerated().filter(|&(_, &v)| v).map(|(x, _)| x)) + ); + + // Check the superset relation. + assert_eq!(set_1.superset(&set_2), set_2.iter().all(|x| set_1.contains(x))); + + // Check the `==` operator. + assert_eq!(set_1 == set_2, set_1_reference == set_2_reference); + + // Check the `hash()` function. + // If the `set_1` and `set_2` are equal, then their hashes must also be equal. + if set_1 == set_2 { + assert_eq!(hasher.hash_one(&set_1), hasher.hash_one(&set_2)); + } + + // Check the count function. + assert_eq!(set_1.count(), set_1_reference.iter().filter(|&&x| x).count()); + assert_eq!(set_2.count(), set_2_reference.iter().filter(|&&x| x).count()); + + // Check `only_one_elem()`. + if let Some(elem) = set_1.only_one_elem() { + assert_eq!(set_1.count(), 1); + assert_eq!(elem, set_1.iter().next().unwrap()); + } else { + assert_ne!(set_1.count(), 1); + } + + // Check `last_set_in()`. + if domain_size > 0 { + let range = rng.sample_range(domain_size - 1); + assert_eq!( + set_1.last_set_in(range.clone()), + range.clone().filter(|&i| set_1.contains(i)).last() + ); + assert_eq!( + set_2.last_set_in(range.clone()), + range.filter(|&i| set_2.contains(i)).last() + ); + } + + // Check `Encodable` and `Decodable` implementations. + if rng.next() as u32 % TEST_ITERATIONS < 128 { + set_1.encode(&mut encoder); + + let mut decoder = DecoderLittleEndian::new(&encoder.bytes); + let decoded = DenseBitSet::::decode(&mut decoder); + assert_eq!( + decoder.position(), + encoder.bytes.len(), + "All bytes must be read when decoding." + ); + + assert_eq!(set_1, decoded); + + encoder.bytes.clear(); + } + } +} + +fn test_relations_with_chunked_set(domain_size: usize) { + const TEST_ITERATIONS: u32 = 64; + + let mut dense_set = DenseBitSet::::new_empty(domain_size); + let mut chunked_set = ChunkedBitSet::new_empty(domain_size); + + let mut rng = Rng::new(42); + + for _ in 0..TEST_ITERATIONS { + // Make a random operation. + match rng.next() % 10 { + 0..3 => { + // Insert in one of the sets. + if domain_size == 0 { + continue; + } + let elem = rng.next() % domain_size; + // Choose set to insert into. + if rng.next_bool() { + dense_set.insert(elem); + } else { + chunked_set.insert(elem); + } + } + 3..6 => { + // Remove from one of the sets. + if domain_size == 0 { + continue; + } + let elem = rng.next() % domain_size; + // Choose set to remove into. + if rng.next_bool() { + dense_set.remove(elem); + } else { + chunked_set.remove(elem); + } + } + 6 => { + // Clear + if rng.next_bool() { + dense_set.clear(); + } else { + chunked_set.clear(); + } + } + 7 => { + // Fill. + if rng.next_bool() { + dense_set.insert_all(domain_size); + } else { + chunked_set.insert_all(); + } + } + 8 => { + // Union + let old_dense_set = dense_set.clone(); + let changed = dense_set.union(&chunked_set); + assert_eq!(old_dense_set != dense_set, changed); + assert!(dense_set.superset(&old_dense_set)); + assert!(chunked_set.iter().all(|x| dense_set.contains(x))); + + // Check that all the added elements come from `chunked_set`. + let mut difference = dense_set.clone(); + difference.subtract(&old_dense_set); + assert!(difference.iter().all(|x| chunked_set.contains(x))); + } + 9 => { + // Intersection + let old_dense_set = dense_set.clone(); + let changed = dense_set.intersect(&chunked_set); + assert_eq!(old_dense_set != dense_set, changed); + assert!(old_dense_set.superset(&dense_set)); + assert!(dense_set.iter().all(|x| chunked_set.contains(x))); + + // Check that no of the removed elements comes from `chunked_set`. + let mut difference = old_dense_set; // Just renaming. + difference.subtract(&dense_set); + assert!(difference.iter().all(|x| !chunked_set.contains(x))); + } + _ => unreachable!(), + } + } +} + +#[test] +fn test_dense_bit_set() { + assert_eq!( + size_of::>(), + size_of::(), + "DenseBitSet should have the same size as a Word" + ); + + test_with_domain_size(0); + test_with_domain_size(1); + test_with_domain_size(63); + test_with_domain_size(64); + test_with_domain_size(65); + test_with_domain_size(127); + test_with_domain_size(128); + test_with_domain_size(129); + + test_relations_with_chunked_set(0); + test_relations_with_chunked_set(1); + test_relations_with_chunked_set(CHUNK_BITS - 1); + test_relations_with_chunked_set(CHUNK_BITS); + test_relations_with_chunked_set(CHUNK_BITS + 2); + test_relations_with_chunked_set(3 * CHUNK_BITS - 2); + test_relations_with_chunked_set(3 * CHUNK_BITS); + test_relations_with_chunked_set(3 * CHUNK_BITS + 1); +} + +#[test] +fn test_growable_bit_set() { + const TEST_ITERATIONS: u32 = 512; + const MAX_ELEMS: usize = 314; + + let mut set = GrowableBitSet::::new_empty(); + let mut reference_set = BTreeSet::::new(); + + let mut rng = Rng::new(42); + + for _ in 0..TEST_ITERATIONS { + match rng.next() % 100 { + 0..30 => { + // Insert an element in the `0..=(DenseBitSet::INLINE_CAPACITY + 2)` range. + let elem = rng.next() % (DenseBitSet::::INLINE_CAPACITY + 3); + set.insert(elem); + reference_set.insert(elem); + } + 30..50 => { + // Insert an element in the `0..MAX_ELEMS` range. + let elem = rng.next() % MAX_ELEMS; + set.insert(elem); + reference_set.insert(elem); + } + 50..70 => { + // Remove an existing element. + let len = set.len(); + if len == 0 { + continue; + } + let elem = set.iter().nth(rng.next() % len).unwrap(); + set.remove(elem); + reference_set.remove(&elem); + } + 70..90 => { + // Remove an arbitrary element in the `0..MAX_ELEMS` range. + let elem = rng.next() % MAX_ELEMS; + set.remove(elem); + reference_set.remove(&elem); + } + 90..100 => { + // Make sure the `with_capacity()` function works. + let capacity = rng.next() % MAX_ELEMS; + set = GrowableBitSet::with_capacity(capacity); + reference_set.clear(); + } + _ => unreachable!(), + } + + // Check the `is_empty()` function. + assert_eq!(set.is_empty(), reference_set.is_empty()); + + // Check the `iter` function. + assert!(set.iter().eq(reference_set.iter().copied())); + + // Check the contains function with a 20 % probability. + if rng.next() % 5 == 0 { + for x in 0..MAX_ELEMS { + assert_eq!(set.contains(x), reference_set.contains(&x)); + } + } + } +} #[test] fn test_new_filled() { @@ -50,11 +624,11 @@ fn bitset_clone_from() { let mut b = DenseBitSet::new_empty(2); b.clone_from(&a); - assert_eq!(b.domain_size(), 10); + assert!(b.capacity() >= 10); assert_eq!(b.iter().collect::>(), [4, 7, 9]); b.clone_from(&DenseBitSet::new_empty(40)); - assert_eq!(b.domain_size(), 40); + assert!(b.capacity() >= 40); assert_eq!(b.iter().collect::>(), []); } @@ -91,7 +665,7 @@ fn union_not() { b.insert(81); // Already in `a`. b.insert(90); - a.union_not(&b); + a.union_not(&b, 100); // After union-not, `a` should contain all values in the domain, except for // the ones that are in `b` and were _not_ already in `a`. @@ -600,10 +1174,7 @@ fn sparse_matrix_operations() { #[test] fn dense_insert_range() { #[track_caller] - fn check(domain: usize, range: R) - where - R: RangeBounds + Clone + IntoIterator + std::fmt::Debug, - { + fn check_range(domain: usize, range: Range) { let mut set = DenseBitSet::new_empty(domain); set.insert_range(range.clone()); for i in set.iter() { @@ -613,32 +1184,45 @@ fn dense_insert_range() { assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range); } } - check(300, 10..10); - check(300, WORD_BITS..WORD_BITS * 2); - check(300, WORD_BITS - 1..WORD_BITS * 2); - check(300, WORD_BITS - 1..WORD_BITS); - check(300, 10..100); - check(300, 10..30); - check(300, 0..5); - check(300, 0..250); - check(300, 200..250); - - check(300, 10..=10); - check(300, WORD_BITS..=WORD_BITS * 2); - check(300, WORD_BITS - 1..=WORD_BITS * 2); - check(300, WORD_BITS - 1..=WORD_BITS); - check(300, 10..=100); - check(300, 10..=30); - check(300, 0..=5); - check(300, 0..=250); - check(300, 200..=250); + + #[track_caller] + fn check_range_inclusive(domain: usize, range: RangeInclusive) { + let mut set = DenseBitSet::new_empty(domain); + set.insert_range_inclusive(range.clone()); + for i in set.iter() { + assert!(range.contains(&i)); + } + for i in range.clone() { + assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range); + } + } + + check_range(300, 10..10); + check_range(300, WORD_BITS..WORD_BITS * 2); + check_range(300, WORD_BITS - 1..WORD_BITS * 2); + check_range(300, WORD_BITS - 1..WORD_BITS); + check_range(300, 10..100); + check_range(300, 10..30); + check_range(300, 0..5); + check_range(300, 0..250); + check_range(300, 200..250); + + check_range_inclusive(300, 10..=10); + check_range_inclusive(300, WORD_BITS..=WORD_BITS * 2); + check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS * 2); + check_range_inclusive(300, WORD_BITS - 1..=WORD_BITS); + check_range_inclusive(300, 10..=100); + check_range_inclusive(300, 10..=30); + check_range_inclusive(300, 0..=5); + check_range_inclusive(300, 0..=250); + check_range_inclusive(300, 200..=250); for i in 0..WORD_BITS * 2 { for j in i..WORD_BITS * 2 { - check(WORD_BITS * 2, i..j); - check(WORD_BITS * 2, i..=j); - check(300, i..j); - check(300, i..=j); + check_range(WORD_BITS * 2, i..j); + check_range_inclusive(WORD_BITS * 2, i..=j); + check_range(300, i..j); + check_range_inclusive(300, i..=j); } } } @@ -656,7 +1240,7 @@ fn dense_last_set_before() { } #[track_caller] - fn cmp(set: &DenseBitSet, needle: impl RangeBounds + Clone + std::fmt::Debug) { + fn cmp(set: &DenseBitSet, needle: RangeInclusive) { assert_eq!( set.last_set_in(needle.clone()), easy(set, needle.clone()), @@ -672,20 +1256,18 @@ fn dense_last_set_before() { set.insert(WORD_BITS - 1); cmp(&set, 0..=WORD_BITS - 1); cmp(&set, 0..=5); - cmp(&set, 10..100); + cmp(&set, 10..=99); set.insert(100); - cmp(&set, 100..110); - cmp(&set, 99..100); + cmp(&set, 100..=119); + cmp(&set, 99..=99); cmp(&set, 99..=100); for i in 0..=WORD_BITS * 2 { for j in i..=WORD_BITS * 2 { for k in 0..WORD_BITS * 2 { let mut set = DenseBitSet::new_empty(300); - cmp(&set, i..j); cmp(&set, i..=j); set.insert(k); - cmp(&set, i..j); cmp(&set, i..=j); } } diff --git a/compiler/rustc_middle/src/values.rs b/compiler/rustc_middle/src/values.rs index 4d70a70873267..46bcc25dc0710 100644 --- a/compiler/rustc_middle/src/values.rs +++ b/compiler/rustc_middle/src/values.rs @@ -376,12 +376,8 @@ fn find_item_ty_spans( }); if check_params && let Some(args) = path.segments.last().unwrap().args { let params_in_repr = tcx.params_in_repr(def_id); - // the domain size check is needed because the HIR may not be well-formed at this point - for (i, arg) in args.args.iter().enumerate().take(params_in_repr.domain_size()) - { - if let hir::GenericArg::Type(ty) = arg - && params_in_repr.contains(i as u32) - { + for arg in params_in_repr.iter().map_while(|i| args.args.get(i as usize)) { + if let hir::GenericArg::Type(ty) = arg { find_item_ty_spans( tcx, ty.as_unambig_ty(), diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs index 3f6e7a0661921..d45509d9de758 100644 --- a/compiler/rustc_mir_dataflow/src/framework/cursor.rs +++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs @@ -127,7 +127,7 @@ where #[cfg(test)] pub(crate) fn allow_unreachable(&mut self) { #[cfg(debug_assertions)] - self.reachable_blocks.insert_all() + self.reachable_blocks.insert_all(self.body().basic_blocks.len()) } /// Returns the `Analysis` used to generate the underlying `Results`. diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs index 38599cd094933..8a5d3c35f2f52 100644 --- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs +++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs @@ -82,21 +82,12 @@ where } fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let size = self.domain_size(); - assert_eq!(size, old.domain_size()); - - let mut set_in_self = MixedBitSet::new_empty(size); - let mut cleared_in_self = MixedBitSet::new_empty(size); - - for i in (0..size).map(T::new) { - match (self.contains(i), old.contains(i)) { - (true, false) => set_in_self.insert(i), - (false, true) => cleared_in_self.insert(i), - _ => continue, - }; - } + let mut set_in_self = self.clone(); + set_in_self.subtract(old); + let mut cleared_in_self = old.clone(); + cleared_in_self.subtract(self); - fmt_diff(&set_in_self, &cleared_in_self, ctxt, f) + fmt_diff(&MixedBitSet::Small(set_in_self), &MixedBitSet::Small(cleared_in_self), ctxt, f) } } diff --git a/compiler/rustc_mir_dataflow/src/impls/initialized.rs b/compiler/rustc_mir_dataflow/src/impls/initialized.rs index 18165b0b9bd08..4f77deb8526d2 100644 --- a/compiler/rustc_mir_dataflow/src/impls/initialized.rs +++ b/compiler/rustc_mir_dataflow/src/impls/initialized.rs @@ -464,7 +464,7 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> { // sets state bits for Arg places fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) { // set all bits to 1 (uninit) before gathering counter-evidence - state.insert_all(); + state.insert_all(self.move_data().move_paths.len()); drop_flag_effects_for_function_entry(self.body, self.move_data, |path, s| { assert!(s == DropFlagState::Present); diff --git a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs index e3aa8f5a62014..896b86156c8a2 100644 --- a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs +++ b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs @@ -87,7 +87,6 @@ impl<'a, 'tcx> Analysis<'tcx> for MaybeStorageDead<'a> { } fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) { - assert_eq!(body.local_decls.len(), self.always_live_locals.domain_size()); // Do not iterate on return place and args, as they are trivially always live. for local in body.vars_and_temps_iter() { if !self.always_live_locals.contains(local) { diff --git a/compiler/rustc_mir_transform/src/copy_prop.rs b/compiler/rustc_mir_transform/src/copy_prop.rs index 27af5818982d0..42cac41d8cb6d 100644 --- a/compiler/rustc_mir_transform/src/copy_prop.rs +++ b/compiler/rustc_mir_transform/src/copy_prop.rs @@ -34,7 +34,7 @@ impl<'tcx> crate::MirPass<'tcx> for CopyProp { let fully_moved = fully_moved_locals(&ssa, body); debug!(?fully_moved); - let mut storage_to_remove = DenseBitSet::new_empty(fully_moved.domain_size()); + let mut storage_to_remove = DenseBitSet::new_empty(body.local_decls.len()); for (local, &head) in ssa.copy_classes().iter_enumerated() { if local != head { storage_to_remove.insert(head); diff --git a/compiler/rustc_mir_transform/src/coroutine.rs b/compiler/rustc_mir_transform/src/coroutine.rs index cddb2f8477858..1b58b7dedf739 100644 --- a/compiler/rustc_mir_transform/src/coroutine.rs +++ b/compiler/rustc_mir_transform/src/coroutine.rs @@ -211,6 +211,9 @@ struct TransformVisitor<'tcx> { old_yield_ty: Ty<'tcx>, old_ret_ty: Ty<'tcx>, + + /// The number of locals in the [`Body`]. + n_locals: usize, } impl<'tcx> TransformVisitor<'tcx> { @@ -440,7 +443,7 @@ impl<'tcx> MutVisitor<'tcx> for TransformVisitor<'tcx> { let storage_liveness: GrowableBitSet = self.storage_liveness[block].clone().unwrap().into(); - for i in 0..self.always_live_locals.domain_size() { + for i in 0..self.n_locals { let l = Local::new(i); let needs_storage_dead = storage_liveness.contains(l) && !self.remap.contains(l) @@ -845,8 +848,6 @@ fn compute_storage_conflicts<'mir, 'tcx>( analysis: &mut MaybeRequiresStorage<'mir, 'tcx>, results: &Results>, ) -> BitMatrix { - assert_eq!(body.local_decls.len(), saved_locals.domain_size()); - debug!("compute_storage_conflicts({:?})", body.span); debug!("always_live = {:?}", always_live_locals); @@ -859,7 +860,11 @@ fn compute_storage_conflicts<'mir, 'tcx>( let mut visitor = StorageConflictVisitor { body, saved_locals, - local_conflicts: BitMatrix::from_row_n(&ineligible_locals, body.local_decls.len()), + local_conflicts: BitMatrix::from_row_n( + &ineligible_locals, + body.local_decls.len(), + body.local_decls.len(), + ), eligible_storage_live: DenseBitSet::new_empty(body.local_decls.len()), }; @@ -1010,7 +1015,7 @@ fn compute_layout<'tcx>( // Create a map from local indices to coroutine struct indices. let mut variant_fields: IndexVec> = iter::repeat(IndexVec::new()).take(CoroutineArgs::RESERVED_VARIANTS).collect(); - let mut remap = IndexVec::from_elem_n(None, saved_locals.domain_size()); + let mut remap = IndexVec::from_elem_n(None, body.local_decls.len()); for (suspension_point_idx, live_locals) in live_locals_at_suspension_points.iter().enumerate() { let variant_index = VariantIdx::from(CoroutineArgs::RESERVED_VARIANTS + suspension_point_idx); @@ -1585,6 +1590,7 @@ impl<'tcx> crate::MirPass<'tcx> for StateTransform { discr_ty, old_ret_ty, old_yield_ty, + n_locals: body.local_decls.len(), }; transform.visit_body(body); diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs index 5568d42ab8f3c..c226f9b89ceb8 100644 --- a/compiler/rustc_mir_transform/src/coverage/counters.rs +++ b/compiler/rustc_mir_transform/src/coverage/counters.rs @@ -81,8 +81,9 @@ pub(crate) fn transcribe_counters( old: &NodeCounters, bcb_needs_counter: &DenseBitSet, bcbs_seen: &DenseBitSet, + num_bcbs: usize, ) -> CoverageCounters { - let mut new = CoverageCounters::with_num_bcbs(bcb_needs_counter.domain_size()); + let mut new = CoverageCounters::with_num_bcbs(num_bcbs); for bcb in bcb_needs_counter.iter() { if !bcbs_seen.contains(bcb) { diff --git a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs index 4c20722a04347..e31cec383cfe4 100644 --- a/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs +++ b/compiler/rustc_mir_transform/src/coverage/counters/balanced_flow.rs @@ -72,7 +72,7 @@ impl BalancedFlowGraph { // Next, find all nodes that are currently not reverse-reachable from // `sink_edge_nodes`, and add them to the set as well. dfs.complete_search(); - sink_edge_nodes.union_not(dfs.visited_set()); + sink_edge_nodes.union_not(dfs.visited_set(), graph.num_nodes()); // The sink node is 1 higher than the highest real node. let sink = G::Node::new(graph.num_nodes()); diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs index ccf76dc710874..003082d396d1b 100644 --- a/compiler/rustc_mir_transform/src/coverage/query.rs +++ b/compiler/rustc_mir_transform/src/coverage/query.rs @@ -136,7 +136,12 @@ fn coverage_ids_info<'tcx>( priority_list[1..].sort_by_key(|&bcb| !bcbs_seen.contains(bcb)); let node_counters = make_node_counters(&fn_cov_info.node_flow_data, &priority_list); - let coverage_counters = transcribe_counters(&node_counters, &bcb_needs_counter, &bcbs_seen); + let coverage_counters = transcribe_counters( + &node_counters, + &bcb_needs_counter, + &bcbs_seen, + fn_cov_info.priority_list.len(), + ); let CoverageCounters { phys_counter_for_node, next_counter_id, node_counters, expressions, .. diff --git a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs index a0db8bdb7ed88..b2f077d5206fc 100644 --- a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs +++ b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs @@ -19,19 +19,20 @@ struct DeduceReadOnly { /// 1). The bit is true if the argument may have been mutated or false if we know it hasn't /// been up to the point we're at. mutable_args: DenseBitSet, + arg_count: usize, } impl DeduceReadOnly { /// Returns a new DeduceReadOnly instance. fn new(arg_count: usize) -> Self { - Self { mutable_args: DenseBitSet::new_empty(arg_count) } + Self { mutable_args: DenseBitSet::new_empty(arg_count), arg_count } } } impl<'tcx> Visitor<'tcx> for DeduceReadOnly { fn visit_place(&mut self, place: &Place<'tcx>, context: PlaceContext, _location: Location) { // We're only interested in arguments. - if place.local == RETURN_PLACE || place.local.index() > self.mutable_args.domain_size() { + if place.local == RETURN_PLACE || place.local.index() > self.arg_count { return; } @@ -86,7 +87,7 @@ impl<'tcx> Visitor<'tcx> for DeduceReadOnly { let local = place.local; if place.is_indirect() || local == RETURN_PLACE - || local.index() > self.mutable_args.domain_size() + || local.index() > self.arg_count { continue; } diff --git a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs index 75f351f05c30e..0d4bfd2f78b8a 100644 --- a/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs +++ b/compiler/rustc_mir_transform/src/lint_tail_expr_drop_order.rs @@ -274,7 +274,7 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body< // We shall now exclude some local bindings for the following cases. { - let mut to_exclude = MixedBitSet::new_empty(all_locals_dropped.domain_size()); + let mut to_exclude = MixedBitSet::new_empty(move_data.move_paths.len()); // We will now do subtraction from the candidate dropped locals, because of the // following reasons. for path_idx in all_locals_dropped.iter() { diff --git a/compiler/rustc_mir_transform/src/single_use_consts.rs b/compiler/rustc_mir_transform/src/single_use_consts.rs index 02caa92ad3fc8..d88f8607f8484 100644 --- a/compiler/rustc_mir_transform/src/single_use_consts.rs +++ b/compiler/rustc_mir_transform/src/single_use_consts.rs @@ -33,7 +33,9 @@ impl<'tcx> crate::MirPass<'tcx> for SingleUseConsts { locals_in_debug_info: DenseBitSet::new_empty(body.local_decls.len()), }; - finder.ineligible_locals.insert_range(..=Local::from_usize(body.arg_count)); + finder + .ineligible_locals + .insert_range_inclusive(Local::from_usize(0)..=Local::from_usize(body.arg_count)); finder.visit_body(body); diff --git a/compiler/rustc_mir_transform/src/sroa.rs b/compiler/rustc_mir_transform/src/sroa.rs index 7c6ccc89c4f30..c390c993c5aed 100644 --- a/compiler/rustc_mir_transform/src/sroa.rs +++ b/compiler/rustc_mir_transform/src/sroa.rs @@ -103,7 +103,7 @@ fn escaping_locals<'tcx>( }; let mut set = DenseBitSet::new_empty(body.local_decls.len()); - set.insert_range(RETURN_PLACE..=Local::from_usize(body.arg_count)); + set.insert_range_inclusive(RETURN_PLACE..=Local::from_usize(body.arg_count)); for (local, decl) in body.local_decls().iter_enumerated() { if excluded.contains(local) || is_excluded_ty(decl.ty) { set.insert(local);