diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index e0bbd568afd80..a632341f29979 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -952,4 +952,36 @@ mod tests { assert!(Entity::new(2, 2) > Entity::new(1, 2)); assert!(Entity::new(2, 2) >= Entity::new(1, 2)); } + + // Feel free to change this test if needed, but it seemed like an important + // part of the best-case performance changes in PR#9903. + #[test] + fn entity_hash_keeps_similar_ids_together() { + use std::hash::BuildHasher; + let hash = bevy_utils::EntityHash; + + let first_id = 0xC0FFEE << 8; + let first_hash = hash.hash_one(Entity::from_raw(first_id)); + + for i in 1..=255 { + let id = first_id + i; + let hash = hash.hash_one(Entity::from_raw(id)); + assert_eq!(hash.wrapping_sub(first_hash) as u32, i); + } + } + + #[test] + fn entity_hash_id_bitflip_affects_high_7_bits() { + use std::hash::BuildHasher; + let hash = bevy_utils::EntityHash; + + let first_id = 0xC0FFEE; + let first_hash = hash.hash_one(Entity::from_raw(first_id)) >> 57; + + for bit in 0..u32::BITS { + let id = first_id ^ (1 << bit); + let hash = hash.hash_one(Entity::from_raw(id)) >> 57; + assert_ne!(hash, first_hash); + } + } } diff --git a/crates/bevy_utils/src/lib.rs b/crates/bevy_utils/src/lib.rs index fc0ec1c125c73..71ac0c5745e23 100644 --- a/crates/bevy_utils/src/lib.rs +++ b/crates/bevy_utils/src/lib.rs @@ -266,17 +266,18 @@ impl BuildHasher for EntityHash { /// A very fast hash that is only designed to work on generational indices /// like `Entity`. It will panic if attempting to hash a type containing /// non-u64 fields. +/// +/// This is heavily optimized for typical cases, where there are lots of runs +/// of contiguous indices and almost no generation conflicts. +/// +/// If you have an unusual case -- say all your indices are multiples of 256 +/// or most of the entities are dead generations -- then you might want also to +/// try [`AHasher`] for a slower hash computation but fewer lookup conflicts. #[derive(Debug, Default)] pub struct EntityHasher { hash: u64, } -// This value comes from rustc-hash (also known as FxHasher) which in turn got -// it from Firefox. It is something like `u64::MAX / N` for an N that gives a -// value close to π and works well for distributing bits for hashing when using -// with a wrapping multiplication. -const FRAC_U64MAX_PI: u64 = 0x517cc1b727220a95; - impl Hasher for EntityHasher { fn write(&mut self, _bytes: &[u8]) { panic!("can only hash u64 using EntityHasher"); @@ -284,11 +285,41 @@ impl Hasher for EntityHasher { #[inline] fn write_u64(&mut self, i: u64) { - // Apparently hashbrown's hashmap uses the upper 7 bits for some SIMD - // optimisation that uses those bits for binning. This hash function - // was faster than i | (i << (64 - 7)) in the worst cases, and was - // faster than PassHasher for all cases tested. - self.hash = i | (i.wrapping_mul(FRAC_U64MAX_PI) << 32); + // We ignore the generation entirely. It's always functionally correct + // to omit things when hashing, so long as it's consistent, just a perf + // trade-off. This hasher is designed for "normal" cases, where nearly + // everything in the table is a live entity, meaning there are few + // generation conflicts. And thus it's overall faster to just ignore + // the generation during hashing, leaving it to the `Entity::eq` to + // confirm the generation matches -- just like `Entity::eq` checks that + // the index is actually the right one, since there's always the chance + // of a conflict in the index despite a good hash function. + // + // This masking actually ends up with negative cost after optimization, + // since it saves needing to do the shift-and-or between the fields. + let index = i & 0xFFFF_FFFF; + + // SwissTable (and thus `hashbrown`) cares about two things from the hash: + // - H1: low bits (masked by `2ⁿ-1`) to pick the slot in which to store the item + // - H2: high 7 bits are used to SIMD optimize hash collision probing + // For more see + + // This hash function assumes that the entity ids are still well-distributed, + // so for H1 leaves the entity id alone in the low bits so that id locality + // will also give memory locality for things spawned together. + // For H2, take advantage of the fact that while multiplication doesn't + // spread entropy to the low bits, it's incredibly good at spreading it + // upward, which is exactly where we need it the most. + + // The high 32 bits of this are ⅟φ for Fibonacci hashing. That works + // particularly well for hashing for the same reason as described in + // + // It loses no information because it has a modular inverse. + // (Specifically, `0x144c_bc89_u32 * 0x9e37_79b9_u32 == 1`.) + // + // The low 32 bits are just 1, to leave the entity id there unchanged. + const UPPER_PHI: u64 = 0x9e37_79b9_0000_0001; + self.hash = index.wrapping_mul(UPPER_PHI); } #[inline]