diff --git a/Cargo.lock b/Cargo.lock index c071a2e11d269..2271800fe327f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2683,6 +2683,7 @@ dependencies = [ name = "rustc_data_structures" version = "0.0.0" dependencies = [ + "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "ena 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "graphviz 0.0.0", diff --git a/src/librustc_data_structures/Cargo.toml b/src/librustc_data_structures/Cargo.toml index 63e44d82a28c3..0bf45bf5f6b62 100644 --- a/src/librustc_data_structures/Cargo.toml +++ b/src/librustc_data_structures/Cargo.toml @@ -10,6 +10,7 @@ path = "lib.rs" crate-type = ["dylib"] [dependencies] +arrayref = "0.3.5" ena = "0.13" log = "0.4" jobserver_crate = { version = "0.1", package = "jobserver" } diff --git a/src/librustc_data_structures/ahash.rs b/src/librustc_data_structures/ahash.rs new file mode 100644 index 0000000000000..ba55a64c561be --- /dev/null +++ b/src/librustc_data_structures/ahash.rs @@ -0,0 +1,271 @@ +pub use rustc_hash::{FxHasher, FxHashMap, FxHashSet}; +use crate::convert::Convert; +use std::hash::{Hasher}; +use std::slice; +use std::mem; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering::Relaxed; +use arrayref::*; + +const BUFFER_SIZE: usize = 1024; + +pub struct BufferedHasher { + cursor: usize, + aes: AHasher, + buffer: [u8; BUFFER_SIZE], +} + +impl ::std::fmt::Debug for BufferedHasher { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + write!(f, "{:?} [{}]", self.aes, self.cursor) + } +} + +#[inline(never)] +#[no_mangle] +#[target_feature(enable = "aes")] +unsafe fn ahash_test(input: &[u8]) -> u64 { + let mut a = AHasher::new_with_keys(67, 87); + a.write(input); + a.finish128().0 +} + +#[inline(never)] +#[no_mangle] +fn ahash_test_one(b: &mut BufferedHasher, input: u64) { + b.write_u64(input); +} + +static AES_ENABLED: AtomicBool = AtomicBool::new(true); + +impl BufferedHasher { + #[inline(always)] + pub fn new() -> Self { + BufferedHasher { + aes: AHasher::new_with_keys(0, 0), + cursor: 0, + buffer: unsafe { std::mem::uninitialized() }, + } + } + + #[inline(always)] + unsafe fn flush(&mut self) { + self.aes.write(self.buffer.get_unchecked(0..self.cursor)); + } + + #[inline(never)] + #[cold] + unsafe fn flush_cold(&mut self) { + if likely!(AES_ENABLED.load(Relaxed)) { + self.flush() + } else { + panic!("no aes"); + } + } + + #[inline(always)] + fn short_write_gen(&mut self, x: T) { + let bytes = unsafe { + slice::from_raw_parts(&x as *const T as *const u8, mem::size_of::()) + }; + self.short_write(bytes); + } + + #[inline(always)] + fn short_write(&mut self, data: &[u8]) { + let mut cursor = self.cursor; + let len = data.len(); + if unlikely!(cursor + len > BUFFER_SIZE) { + unsafe { self.flush_cold() }; + cursor = 0; + } + unsafe { + self.buffer.get_unchecked_mut(cursor..(cursor + len)).copy_from_slice(data); + } + self.cursor = cursor + len; + } + + #[inline(never)] + #[target_feature(enable = "aes")] + unsafe fn finish128_aes(mut self) -> (u64, u64) { + self.flush(); + self.aes.finish128() + } + + #[inline] + pub fn finish128(self) -> (u64, u64) { + if likely!(AES_ENABLED.load(Relaxed)) { + unsafe { + self.finish128_aes() + } + } else { + panic!("no aes"); + } + } +} + +impl Hasher for BufferedHasher { + #[inline] + fn write_u8(&mut self, i: u8) { + self.short_write_gen(i); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.short_write_gen(i); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.short_write_gen(i); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.short_write_gen(i); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.short_write_gen(i); + } + + #[inline] + fn write_i8(&mut self, i: i8) { + self.short_write_gen(i); + } + + #[inline] + fn write_i16(&mut self, i: i16) { + self.short_write_gen(i); + } + + #[inline] + fn write_i32(&mut self, i: i32) { + self.short_write_gen(i); + } + + #[inline] + fn write_i64(&mut self, i: i64) { + self.short_write_gen(i); + } + + #[inline] + fn write_isize(&mut self, i: isize) { + self.short_write_gen(i); + } + + #[inline] + fn write(&mut self, data: &[u8]) { + if likely!(data.len() < BUFFER_SIZE / 10) { + self.short_write(data); + } else { + unsafe { + self.aes.write(data); + } + } + } + + fn finish(&self) -> u64 { + panic!("cannot provide valid 64 bit hashes") + } +} + +///Just a simple bit pattern. +const PAD : u128 = 0xF0E1_D2C3_B4A5_9687_7869_5A4B_3C2D_1E0F; + +#[derive(Debug, Clone)] +pub struct AHasher { + buffer: [u64; 2], +} + +impl AHasher { + #[inline] + pub(crate) fn new_with_keys(key0: u64, key1: u64) -> AHasher { + AHasher { buffer: [key0, key1] } + } + + #[inline] + #[target_feature(enable = "aes")] + unsafe fn write(&mut self, input: &[u8]) { + let mut data = input; + let length = data.len() as u64; + //This will be scrambled by the first AES round in any branch. + self.buffer[1] ^= length; + //A 'binary search' on sizes reduces the number of comparisons. + if data.len() >= 8 { + if data.len() > 16 { + if data.len() > 128 { + let mut par_block: u128 = self.buffer.convert(); + while data.len() > 128 { + let (b1, rest) = data.split_at(16); + let b1: u128 = (*as_array!(b1, 16)).convert(); + par_block = aeshash(par_block, b1); + data = rest; + let (b2, rest) = data.split_at(16); + let b2: u128 = (*as_array!(b2, 16)).convert(); + self.buffer = aeshash(self.buffer.convert(), b2).convert(); + data = rest; + } + self.buffer = aeshash(self.buffer.convert(), par_block).convert(); + } + while data.len() > 32 { + //len 33-128 + let (block, rest) = data.split_at(16); + let block: u128 = (*as_array!(block, 16)).convert(); + self.buffer = aeshash(self.buffer.convert(),block).convert(); + data = rest; + } + //len 17-32 + let block = (*array_ref!(data, 0, 16)).convert(); + self.buffer = aeshash(self.buffer.convert(),block).convert(); + let block = (*array_ref!(data, data.len()-16, 16)).convert(); + self.buffer = aeshash(self.buffer.convert(),block).convert(); + } else { + //len 8-16 + let block: [u64; 2] = [(*array_ref!(data, 0, 8)).convert(), + (*array_ref!(data, data.len()-8, 8)).convert()]; + self.buffer = aeshash(self.buffer.convert(),block.convert()).convert(); + } + } else { + if data.len() >= 2 { + if data.len() >= 4 { + //len 4-7 + let block: [u32; 2] = [(*array_ref!(data, 0, 4)).convert(), + (*array_ref!(data, data.len()-4, 4)).convert()]; + let block: [u64;2] = [block[1] as u64, block[0] as u64]; + self.buffer = aeshash(self.buffer.convert(),block.convert()).convert() + } else { + //len 2-3 + let block: [u16; 2] = [(*array_ref!(data, 0, 2)).convert(), + (*array_ref!(data, data.len()-2, 2)).convert()]; + let block: u32 = block.convert(); + self.buffer = aeshash(self.buffer.convert(), block as u128).convert(); + } + } else { + if data.len() > 0 { + //len 1 + self.buffer = aeshash(self.buffer.convert(), data[0] as u128).convert(); + } + } + } + } + #[inline] + #[target_feature(enable = "aes")] + unsafe fn finish128(self) -> (u64, u64) { + let result: [u64; 2] = aeshash(aeshash(self.buffer.convert(), PAD), PAD).convert(); + (result[0], result[1]) + } +} + +#[inline] +#[target_feature(enable = "aes")] +unsafe fn aeshash(value: u128, xor: u128) -> u128 { + use std::mem::transmute; + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let value = transmute(value); + transmute(_mm_aesdec_si128(value, transmute(xor))) +} diff --git a/src/librustc_data_structures/convert.rs b/src/librustc_data_structures/convert.rs new file mode 100644 index 0000000000000..1b47b81629c45 --- /dev/null +++ b/src/librustc_data_structures/convert.rs @@ -0,0 +1,81 @@ +use std::mem::transmute; + +pub(crate) trait Convert { + fn convert(self) -> To; + fn convert_ref(&self) -> &To; + fn convert_mut_ref(&mut self) -> &mut To; +} +macro_rules! convert { + ($from:ty, $to:ty) => { + impl Convert<$to> for $from { + #[inline(always)] + fn convert(self) -> $to { + unsafe { transmute(self) } + } + #[inline(always)] + fn convert_ref(&self) -> &$to { + unsafe { transmute(self) } + } + #[inline(always)] + fn convert_mut_ref(&mut self) -> &mut $to { + unsafe { transmute(self) } + } + } + impl Convert<$from> for $to { + #[inline(always)] + fn convert(self) -> $from { + unsafe { transmute(self) } + } + #[inline(always)] + fn convert_ref(&self) -> &$from { + unsafe { transmute(self) } + } + #[inline(always)] + fn convert_mut_ref(&mut self) -> &mut $from { + unsafe { transmute(self) } + } + } + }; +} +convert!(u128, [u64; 2]); +convert!(u128, [u32; 4]); +convert!(u128, [u16; 8]); +convert!(u128, [u8; 16]); +convert!([u64; 2], [u32; 4]); +convert!([u64; 2], [u16; 8]); +convert!([u64; 2], [u8; 16]); +convert!([u32; 4], [u16; 8]); +convert!([u32; 4], [u8; 16]); +convert!([u16; 8], [u8; 16]); +convert!(u64, [u32; 2]); +convert!(u64, [u16; 4]); +convert!(u64, [u8; 8]); +convert!([u32; 2], [u16; 4]); +convert!([u32; 2], [u8; 8]); +convert!(u32, [u16; 2]); +convert!(u32, [u8; 4]); +convert!([u16; 2], [u8; 4]); +convert!(u16, [u8; 2]); + +convert!([f64; 2], [u8; 16]); +convert!([f32; 4], [u8; 16]); +convert!(f64, [u8; 8]); +convert!([f32; 2], [u8; 8]); +convert!(f32, [u8; 4]); + + + +macro_rules! as_array { + ($input:expr, $len:expr) => {{ + { + #[inline] + fn as_array(slice: &[T]) -> &[T; $len] { + assert_eq!(slice.len(), $len); + unsafe { + &*(slice.as_ptr() as *const [_; $len]) + } + } + as_array($input) + } + }} +} diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index a1d7ab8856daa..029cfb6c483f0 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -69,6 +69,9 @@ macro_rules! unlikely { } } +#[macro_use] +pub mod convert; +pub mod ahash; pub mod macros; pub mod svh; pub mod base_n; diff --git a/src/librustc_data_structures/stable_hasher.rs b/src/librustc_data_structures/stable_hasher.rs index 19343a9250df3..cdb5ed84cb50b 100644 --- a/src/librustc_data_structures/stable_hasher.rs +++ b/src/librustc_data_structures/stable_hasher.rs @@ -1,7 +1,7 @@ use std::hash::{Hash, Hasher, BuildHasher}; use std::marker::PhantomData; use std::mem; -use crate::sip128::SipHasher128; +use crate::ahash::BufferedHasher; use crate::indexed_vec; use crate::bit_set; @@ -13,8 +13,7 @@ use crate::bit_set; /// hashing and the architecture dependent `isize` and `usize` types are /// extended to 64 bits if needed. pub struct StableHasher { - state: SipHasher128, - bytes_hashed: u64, + state: BufferedHasher, width: PhantomData, } @@ -29,10 +28,10 @@ pub trait StableHasherResult: Sized { } impl StableHasher { + #[inline(always)] pub fn new() -> Self { StableHasher { - state: SipHasher128::new_with_keys(0, 0), - bytes_hashed: 0, + state: BufferedHasher::new(), width: PhantomData, } } @@ -60,11 +59,6 @@ impl StableHasher { pub fn finalize(self) -> (u64, u64) { self.state.finish128() } - - #[inline] - pub fn bytes_hashed(&self) -> u64 { - self.bytes_hashed - } } impl Hasher for StableHasher { @@ -75,37 +69,31 @@ impl Hasher for StableHasher { #[inline] fn write(&mut self, bytes: &[u8]) { self.state.write(bytes); - self.bytes_hashed += bytes.len() as u64; } #[inline] fn write_u8(&mut self, i: u8) { self.state.write_u8(i); - self.bytes_hashed += 1; } #[inline] fn write_u16(&mut self, i: u16) { self.state.write_u16(i.to_le()); - self.bytes_hashed += 2; } #[inline] fn write_u32(&mut self, i: u32) { self.state.write_u32(i.to_le()); - self.bytes_hashed += 4; } #[inline] fn write_u64(&mut self, i: u64) { self.state.write_u64(i.to_le()); - self.bytes_hashed += 8; } #[inline] fn write_u128(&mut self, i: u128) { self.state.write_u128(i.to_le()); - self.bytes_hashed += 16; } #[inline] @@ -114,37 +102,31 @@ impl Hasher for StableHasher { // platforms. This is important for symbol hashes when cross compiling, // for example. self.state.write_u64((i as u64).to_le()); - self.bytes_hashed += 8; } #[inline] fn write_i8(&mut self, i: i8) { self.state.write_i8(i); - self.bytes_hashed += 1; } #[inline] fn write_i16(&mut self, i: i16) { self.state.write_i16(i.to_le()); - self.bytes_hashed += 2; } #[inline] fn write_i32(&mut self, i: i32) { self.state.write_i32(i.to_le()); - self.bytes_hashed += 4; } #[inline] fn write_i64(&mut self, i: i64) { self.state.write_i64(i.to_le()); - self.bytes_hashed += 8; } #[inline] fn write_i128(&mut self, i: i128) { self.state.write_i128(i.to_le()); - self.bytes_hashed += 16; } #[inline] @@ -153,7 +135,6 @@ impl Hasher for StableHasher { // platforms. This is important for symbol hashes when cross compiling, // for example. self.state.write_i64((i as i64).to_le()); - self.bytes_hashed += 8; } } diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 30fe327cac4ff..1218ae09876fa 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -59,6 +59,7 @@ const WHITELIST: &[Crate<'_>] = &[ Crate("adler32"), Crate("aho-corasick"), Crate("arrayvec"), + Crate("arrayref"), Crate("atty"), Crate("backtrace"), Crate("backtrace-sys"),