From 45d2ae4a9b849f6a60e4f4822e4baff963ca6039 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Thu, 11 Mar 2021 22:06:45 +0100 Subject: [PATCH 1/6] Move raw bytes handling to Encoder/Decoder. --- .../rustc_data_structures/src/fingerprint.rs | 4 +- .../src/persist/file_format.rs | 1 + compiler/rustc_metadata/src/rmeta/encoder.rs | 4 +- compiler/rustc_metadata/src/rmeta/table.rs | 3 +- compiler/rustc_serialize/src/opaque.rs | 67 ++++++++++--------- compiler/rustc_serialize/src/serialize.rs | 18 +++++ 6 files changed, 59 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 681b49e2ea97b..6b533f9f05790 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -1,7 +1,7 @@ use crate::stable_hasher; use rustc_serialize::{ opaque::{self, EncodeResult, FileEncodeResult}, - Decodable, Encodable, + Decodable, Decoder, Encodable, Encoder, }; use std::hash::{Hash, Hasher}; use std::mem::{self, MaybeUninit}; @@ -158,7 +158,7 @@ impl FingerprintEncoder for E { impl FingerprintEncoder for opaque::Encoder { fn encode_fingerprint(&mut self, f: &Fingerprint) -> EncodeResult { let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) }; - self.emit_raw_bytes(&bytes); + self.emit_raw_bytes(&bytes)?; Ok(()) } } diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index 374a9eb41e5c7..b821ed6cff9f8 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -15,6 +15,7 @@ use std::io::{self, Read}; use std::path::Path; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_serialize::Encoder; /// The first few bytes of files generated by incremental compilation. const FILE_MAGIC: &[u8] = b"RSIC"; diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 9336add96929c..ebe6784c99245 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2073,10 +2073,10 @@ pub(super) fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata { fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata { let mut encoder = opaque::Encoder::new(vec![]); - encoder.emit_raw_bytes(METADATA_HEADER); + encoder.emit_raw_bytes(METADATA_HEADER).unwrap(); // Will be filled with the root position after encoding everything. - encoder.emit_raw_bytes(&[0, 0, 0, 0]); + encoder.emit_raw_bytes(&[0, 0, 0, 0]).unwrap(); let source_map_files = tcx.sess.source_map().files(); let source_file_cache = (source_map_files[0].clone(), 0); diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 03bd4170ea990..62c0ce1584594 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -2,6 +2,7 @@ use crate::rmeta::*; use rustc_index::vec::Idx; use rustc_serialize::opaque::Encoder; +use rustc_serialize::Encoder as _; use std::convert::TryInto; use std::marker::PhantomData; use std::num::NonZeroUsize; @@ -172,7 +173,7 @@ where pub(crate) fn encode(&self, buf: &mut Encoder) -> Lazy> { let pos = buf.position(); - buf.emit_raw_bytes(&self.bytes); + buf.emit_raw_bytes(&self.bytes).unwrap(); Lazy::from_position_and_meta(NonZeroUsize::new(pos as usize).unwrap(), self.bytes.len()) } } diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 3e37fc87ce6f9..648f609783dda 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -1,5 +1,5 @@ use crate::leb128::{self, max_leb128_len}; -use crate::serialize; +use crate::serialize::{self, Decoder as _, Encoder as _}; use std::borrow::Cow; use std::fs::File; use std::io::{self, Write}; @@ -30,11 +30,6 @@ impl Encoder { pub fn position(&self) -> usize { self.data.len() } - - #[inline] - pub fn emit_raw_bytes(&mut self, s: &[u8]) { - self.data.extend_from_slice(s); - } } macro_rules! write_leb128 { @@ -154,7 +149,13 @@ impl serialize::Encoder for Encoder { #[inline] fn emit_str(&mut self, v: &str) -> EncodeResult { self.emit_usize(v.len())?; - self.emit_raw_bytes(v.as_bytes()); + self.emit_raw_bytes(v.as_bytes())?; + Ok(()) + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) -> EncodeResult { + self.data.extend_from_slice(s); Ok(()) } } @@ -208,11 +209,6 @@ impl FileEncoder { self.flushed + self.buffered } - #[inline] - pub fn emit_raw_bytes(&mut self, s: &[u8]) -> FileEncodeResult { - self.write_all(s) - } - pub fn flush(&mut self) -> FileEncodeResult { // This is basically a copy of `BufWriter::flush`. If `BufWriter` ever // offers a raw buffer access API, we can use it, and remove this. @@ -508,6 +504,11 @@ impl serialize::Encoder for FileEncoder { self.emit_usize(v.len())?; self.emit_raw_bytes(v.as_bytes()) } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) -> FileEncodeResult { + self.write_all(s) + } } // ----------------------------------------------------------------------------- @@ -539,26 +540,6 @@ impl<'a> Decoder<'a> { pub fn advance(&mut self, bytes: usize) { self.position += bytes; } - - #[inline] - pub fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), String> { - let start = self.position; - let end = start + s.len(); - assert!(end <= self.data.len()); - - // SAFETY: Both `src` and `dst` point to at least `s.len()` elements: - // `src` points to at least `s.len()` elements by above assert, and - // `dst` points to `s.len()` elements by derivation from `s`. - unsafe { - let src = self.data.as_ptr().add(start); - let dst = s.as_mut_ptr() as *mut u8; - ptr::copy_nonoverlapping(src, dst, s.len()); - } - - self.position = end; - - Ok(()) - } } macro_rules! read_leb128 { @@ -677,6 +658,26 @@ impl<'a> serialize::Decoder for Decoder<'a> { fn error(&mut self, err: &str) -> Self::Error { err.to_string() } + + #[inline] + fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), String> { + let start = self.position; + let end = start + s.len(); + assert!(end <= self.data.len()); + + // SAFETY: Both `src` and `dst` point to at least `s.len()` elements: + // `src` points to at least `s.len()` elements by above assert, and + // `dst` points to `s.len()` elements by derivation from `s`. + unsafe { + let src = self.data.as_ptr().add(start); + let dst = s.as_mut_ptr() as *mut u8; + ptr::copy_nonoverlapping(src, dst, s.len()); + } + + self.position = end; + + Ok(()) + } } // Specializations for contiguous byte sequences follow. The default implementations for slices @@ -689,7 +690,7 @@ impl<'a> serialize::Decoder for Decoder<'a> { impl serialize::Encodable for [u8] { fn encode(&self, e: &mut Encoder) -> EncodeResult { serialize::Encoder::emit_usize(e, self.len())?; - e.emit_raw_bytes(self); + e.emit_raw_bytes(self)?; Ok(()) } } diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index 47aad5b88c622..5ef6863766c34 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -7,6 +7,7 @@ Core encoding and decoding interfaces. use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::marker::PhantomData; +use std::mem::MaybeUninit; use std::path; use std::rc::Rc; use std::sync::Arc; @@ -200,6 +201,14 @@ pub trait Encoder { { f(self) } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> { + for &c in s.iter() { + self.emit_u8(c)?; + } + Ok(()) + } } pub trait Decoder { @@ -377,6 +386,15 @@ pub trait Decoder { // Failure fn error(&mut self, err: &str) -> Self::Error; + + #[inline] + fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), Self::Error> { + for c in s.iter_mut() { + let h = self.read_u8()?; + unsafe { *c.as_mut_ptr() = h }; + } + Ok(()) + } } /// Trait for types that can be serialized From 96984fe5c89e8d9da79fff88c46a2087691050ad Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Thu, 11 Mar 2021 22:16:15 +0100 Subject: [PATCH 2/6] Remove FingerprintEncoder/Decoder. --- .../rustc_data_structures/src/fingerprint.rs | 66 +++---------------- compiler/rustc_metadata/src/rmeta/decoder.rs | 7 -- compiler/rustc_metadata/src/rmeta/encoder.rs | 7 -- .../src/ty/query/on_disk_cache.rs | 13 ---- 4 files changed, 10 insertions(+), 83 deletions(-) diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 6b533f9f05790..81b115fac0d0e 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -1,8 +1,5 @@ use crate::stable_hasher; -use rustc_serialize::{ - opaque::{self, EncodeResult, FileEncodeResult}, - Decodable, Decoder, Encodable, Encoder, -}; +use rustc_serialize::{Decodable, Encodable}; use std::hash::{Hash, Hasher}; use std::mem::{self, MaybeUninit}; @@ -63,16 +60,6 @@ impl Fingerprint { pub fn to_hex(&self) -> String { format!("{:x}{:x}", self.0, self.1) } - - pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result { - let mut bytes: [MaybeUninit; 16] = MaybeUninit::uninit_array(); - - decoder.read_raw_bytes(&mut bytes)?; - - let [l, r]: [u64; 2] = unsafe { mem::transmute(bytes) }; - - Ok(Fingerprint(u64::from_le(l), u64::from_le(r))) - } } impl std::fmt::Display for Fingerprint { @@ -130,55 +117,22 @@ impl stable_hasher::StableHasherResult for Fingerprint { impl_stable_hash_via_hash!(Fingerprint); impl Encodable for Fingerprint { + #[inline] fn encode(&self, s: &mut E) -> Result<(), E::Error> { - s.encode_fingerprint(self) + let bytes: [u8; 16] = unsafe { mem::transmute([self.0.to_le(), self.1.to_le()]) }; + s.emit_raw_bytes(&bytes)?; + Ok(()) } } impl Decodable for Fingerprint { + #[inline] fn decode(d: &mut D) -> Result { - d.decode_fingerprint() - } -} - -pub trait FingerprintEncoder: rustc_serialize::Encoder { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), Self::Error>; -} - -pub trait FingerprintDecoder: rustc_serialize::Decoder { - fn decode_fingerprint(&mut self) -> Result; -} - -impl FingerprintEncoder for E { - default fn encode_fingerprint(&mut self, _: &Fingerprint) -> Result<(), E::Error> { - panic!("Cannot encode `Fingerprint` with `{}`", std::any::type_name::()); - } -} - -impl FingerprintEncoder for opaque::Encoder { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> EncodeResult { - let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) }; - self.emit_raw_bytes(&bytes)?; - Ok(()) - } -} - -impl FingerprintEncoder for opaque::FileEncoder { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> FileEncodeResult { - let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) }; - self.emit_raw_bytes(&bytes) - } -} - -impl FingerprintDecoder for D { - default fn decode_fingerprint(&mut self) -> Result { - panic!("Cannot decode `Fingerprint` with `{}`", std::any::type_name::()); - } -} + let mut bytes: [MaybeUninit; 16] = MaybeUninit::uninit_array(); + d.read_raw_bytes(&mut bytes)?; -impl FingerprintDecoder for opaque::Decoder<'_> { - fn decode_fingerprint(&mut self) -> Result { - Fingerprint::decode_opaque(self) + let [l, r]: [u64; 2] = unsafe { mem::transmute(bytes) }; + Ok(Fingerprint(u64::from_le(l), u64::from_le(r))) } } diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index e9b8388c1c915..e8891e471f9da 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -7,7 +7,6 @@ use crate::rmeta::*; use rustc_ast as ast; use rustc_attr as attr; use rustc_data_structures::captures::Captures; -use rustc_data_structures::fingerprint::{Fingerprint, FingerprintDecoder}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{Lock, LockGuard, Lrc, OnceCell}; @@ -351,12 +350,6 @@ impl<'a, 'tcx> Decodable> for DefIndex { } } -impl<'a, 'tcx> FingerprintDecoder for DecodeContext<'a, 'tcx> { - fn decode_fingerprint(&mut self) -> Result { - Fingerprint::decode_opaque(&mut self.opaque) - } -} - impl<'a, 'tcx> Decodable> for SyntaxContext { fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Result { let cdata = decoder.cdata(); diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index ebe6784c99245..4c3d15a86ab06 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -1,7 +1,6 @@ use crate::rmeta::table::{FixedSizeEncoding, TableBuilder}; use crate::rmeta::*; -use rustc_data_structures::fingerprint::{Fingerprint, FingerprintEncoder}; use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::stable_hasher::StableHasher; use rustc_data_structures::sync::{join, par_iter, Lrc, ParallelIterator}; @@ -307,12 +306,6 @@ impl<'a, 'tcx> Encodable> for Span { } } -impl<'a, 'tcx> FingerprintEncoder for EncodeContext<'a, 'tcx> { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), Self::Error> { - self.opaque.encode_fingerprint(f) - } -} - impl<'a, 'tcx> TyEncoder<'tcx> for EncodeContext<'a, 'tcx> { const CLEAR_CROSS_CRATE: bool = true; diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs index d0cd8a48f99b3..877d9741eb02d 100644 --- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs @@ -4,7 +4,6 @@ use crate::mir::{self, interpret}; use crate::ty::codec::{RefDecodable, TyDecoder, TyEncoder}; use crate::ty::context::TyCtxt; use crate::ty::{self, Ty}; -use rustc_data_structures::fingerprint::{Fingerprint, FingerprintDecoder, FingerprintEncoder}; use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet}; use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell}; use rustc_data_structures::thin_vec::ThinVec; @@ -913,12 +912,6 @@ impl<'a, 'tcx> Decodable> for DefId { } } -impl<'a, 'tcx> FingerprintDecoder for CacheDecoder<'a, 'tcx> { - fn decode_fingerprint(&mut self) -> Result { - Fingerprint::decode_opaque(&mut self.opaque) - } -} - impl<'a, 'tcx> Decodable> for &'tcx FxHashSet { fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Result { RefDecodable::decode(d) @@ -1011,12 +1004,6 @@ where } } -impl<'a, 'tcx, E: OpaqueEncoder> FingerprintEncoder for CacheEncoder<'a, 'tcx, E> { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), E::Error> { - self.encoder.encode_fingerprint(f) - } -} - impl<'a, 'tcx, E> Encodable> for SyntaxContext where E: 'a + OpaqueEncoder, From a92f932d9ca3bdf08602b9b79bd664f198cadd56 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Thu, 11 Mar 2021 22:58:49 +0100 Subject: [PATCH 3/6] Introduce FileDecoder. --- compiler/rustc_serialize/src/opaque.rs | 163 ++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 648f609783dda..695b444fb1f2b 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -2,7 +2,7 @@ use crate::leb128::{self, max_leb128_len}; use crate::serialize::{self, Decoder as _, Encoder as _}; use std::borrow::Cow; use std::fs::File; -use std::io::{self, Write}; +use std::io::{self, BufRead, BufReader, Read, Write}; use std::mem::MaybeUninit; use std::path::Path; use std::ptr; @@ -680,6 +680,151 @@ impl<'a> serialize::Decoder for Decoder<'a> { } } +pub struct FileDecoder { + pub file: BufReader, +} + +impl FileDecoder { + #[inline] + pub fn new(file: BufReader) -> Self { + FileDecoder { file } + } + + #[inline] + pub fn advance(&mut self, bytes: usize) { + self.file.consume(bytes) + } +} + +macro_rules! read_leb128 { + ($dec:expr, $fun:ident, $ty:ty) => {{ + let mut buf = $dec.file.buffer(); + if buf.len() < max_leb128_len!($ty) { + buf = $dec.file.fill_buf()?; + } + let (value, bytes_read): ($ty, usize) = leb128::$fun(&buf); + $dec.file.consume(bytes_read); + Ok(value) + }}; +} + +impl serialize::Decoder for FileDecoder { + type Error = io::Error; + + #[inline] + fn read_nil(&mut self) -> Result<(), Self::Error> { + Ok(()) + } + + #[inline] + fn read_u128(&mut self) -> Result { + read_leb128!(self, read_u128_leb128, u128) + } + + #[inline] + fn read_u64(&mut self) -> Result { + read_leb128!(self, read_u64_leb128, u64) + } + + #[inline] + fn read_u32(&mut self) -> Result { + read_leb128!(self, read_u32_leb128, u32) + } + + #[inline] + fn read_u16(&mut self) -> Result { + read_leb128!(self, read_u16_leb128, u16) + } + + #[inline] + fn read_u8(&mut self) -> Result { + let mut value = [0; 1]; + self.file.read_exact(&mut value)?; + let [value] = value; + Ok(value) + } + + #[inline] + fn read_usize(&mut self) -> Result { + read_leb128!(self, read_usize_leb128, usize) + } + + #[inline] + fn read_i128(&mut self) -> Result { + read_leb128!(self, read_i128_leb128, i128) + } + + #[inline] + fn read_i64(&mut self) -> Result { + read_leb128!(self, read_i64_leb128, i64) + } + + #[inline] + fn read_i32(&mut self) -> Result { + read_leb128!(self, read_i32_leb128, i32) + } + + #[inline] + fn read_i16(&mut self) -> Result { + read_leb128!(self, read_i16_leb128, i16) + } + + #[inline] + fn read_i8(&mut self) -> Result { + let as_u8 = self.read_u8()?; + unsafe { Ok(::std::mem::transmute(as_u8)) } + } + + #[inline] + fn read_isize(&mut self) -> Result { + read_leb128!(self, read_isize_leb128, isize) + } + + #[inline] + fn read_bool(&mut self) -> Result { + let value = self.read_u8()?; + Ok(value != 0) + } + + #[inline] + fn read_f64(&mut self) -> Result { + let bits = self.read_u64()?; + Ok(f64::from_bits(bits)) + } + + #[inline] + fn read_f32(&mut self) -> Result { + let bits = self.read_u32()?; + Ok(f32::from_bits(bits)) + } + + #[inline] + fn read_char(&mut self) -> Result { + let bits = self.read_u32()?; + Ok(std::char::from_u32(bits).unwrap()) + } + + #[inline] + fn read_str(&mut self) -> Result, Self::Error> { + let len = self.read_usize()?; + let mut buf = Vec::new(); + buf.resize(len, 0u8); + self.file.read_exact(&mut buf)?; + let s = String::from_utf8(buf).unwrap(); + Ok(Cow::Owned(s)) + } + + #[inline] + fn error(&mut self, err: &str) -> Self::Error { + io::Error::new(io::ErrorKind::Other, err) + } + + #[inline] + fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), Self::Error> { + self.file.read_exact(unsafe { MaybeUninit::slice_assume_init_mut(s) }) + } +} + // Specializations for contiguous byte sequences follow. The default implementations for slices // encode and decode each element individually. This isn't necessary for `u8` slices when using // opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. @@ -719,3 +864,19 @@ impl<'a> serialize::Decodable> for Vec { Ok(v) } } + +impl serialize::Decodable for Vec { + fn decode(d: &mut FileDecoder) -> Result { + let len = serialize::Decoder::read_usize(d)?; + + let mut v = Vec::with_capacity(len); + let buf = &mut v.spare_capacity_mut()[..len]; + d.read_raw_bytes(buf)?; + + unsafe { + v.set_len(len); + } + + Ok(v) + } +} From 61fc8e39aa67e816b3d5f67c60e7183a04b6c448 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Thu, 11 Mar 2021 23:28:31 +0100 Subject: [PATCH 4/6] Read incremental files on-demand. --- .../src/persist/file_format.rs | 11 +++++----- .../rustc_incremental/src/persist/load.rs | 22 ++++++++++++------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index b821ed6cff9f8..0001f54c694de 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -54,14 +54,14 @@ pub fn read_file( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> io::Result, usize)>> { - let data = match fs::read(path) { - Ok(data) => data, +) -> io::Result>> { + let file = match fs::File::open(path) { + Ok(file) => file, Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err), }; - let mut file = io::Cursor::new(data); + let mut file = io::BufReader::new(file); // Check FILE_MAGIC { @@ -102,8 +102,7 @@ pub fn read_file( } } - let post_header_start_pos = file.position() as usize; - Ok(Some((file.into_inner(), post_header_start_pos))) + Ok(Some(file)) } fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) { diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 2b5649bb0594f..6e56febae833c 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -4,9 +4,11 @@ use rustc_data_structures::fx::FxHashMap; use rustc_hir::definitions::Definitions; use rustc_middle::dep_graph::{PreviousDepGraph, SerializedDepGraph, WorkProduct, WorkProductId}; use rustc_middle::ty::query::OnDiskCache; -use rustc_serialize::opaque::Decoder; +use rustc_serialize::opaque::FileDecoder; use rustc_serialize::Decodable as RustcDecodable; use rustc_session::Session; +use std::fs; +use std::io::{self, Read, Seek}; use std::path::Path; use super::data::*; @@ -49,9 +51,9 @@ fn load_data( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> LoadResult<(Vec, usize)> { +) -> LoadResult> { match file_format::read_file(report_incremental_info, path, nightly_build) { - Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos }, + Ok(Some(file)) => LoadResult::Ok { data: file }, Ok(None) => { // The file either didn't exist or was produced by an incompatible // compiler version. Neither is an error. @@ -116,9 +118,9 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture { let work_products_path = work_products_path(sess); let load_result = load_data(report_incremental_info, &work_products_path, nightly_build); - if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result { + if let LoadResult::Ok { data: file } = load_result { // Decode the list of work_products - let mut work_product_decoder = Decoder::new(&work_products_data[..], start_pos); + let mut work_product_decoder = FileDecoder::new(file); let work_products: Vec = RustcDecodable::decode(&mut work_product_decoder).unwrap_or_else(|e| { let msg = format!( @@ -163,8 +165,8 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture { match load_data(report_incremental_info, &path, nightly_build) { LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, LoadResult::Error { message } => LoadResult::Error { message }, - LoadResult::Ok { data: (bytes, start_pos) } => { - let mut decoder = Decoder::new(&bytes, start_pos); + LoadResult::Ok { data: file } => { + let mut decoder = FileDecoder::new(file); let prev_commandline_args_hash = u64::decode(&mut decoder) .expect("Error reading commandline arg hash from cached dep-graph"); @@ -211,7 +213,11 @@ pub fn load_query_result_cache<'a>( &query_cache_path(sess), sess.is_nightly_build(), ) { - LoadResult::Ok { data: (bytes, start_pos) } => { + LoadResult::Ok { data: mut file } => { + let start_pos = file.seek(io::SeekFrom::Current(0)).unwrap() as usize; + file.seek(io::SeekFrom::Start(0)).unwrap(); + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes).unwrap(); Some(OnDiskCache::new(sess, bytes, start_pos, definitions)) } _ => Some(OnDiskCache::new_empty(sess.source_map())), From 3c25ddfcb1b7565e7328c31ce1179881c8cc4e1a Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 13 Mar 2021 00:05:10 +0100 Subject: [PATCH 5/6] Reimplement BufReader to retry harder. --- compiler/rustc_serialize/src/opaque.rs | 138 ++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 695b444fb1f2b..740c3dfac8dd5 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -1,8 +1,9 @@ use crate::leb128::{self, max_leb128_len}; use crate::serialize::{self, Decoder as _, Encoder as _}; use std::borrow::Cow; +use std::convert::TryInto; use std::fs::File; -use std::io::{self, BufRead, BufReader, Read, Write}; +use std::io::{self, BufReader, Read, Seek, SeekFrom, Write}; use std::mem::MaybeUninit; use std::path::Path; use std::ptr; @@ -681,29 +682,138 @@ impl<'a> serialize::Decoder for Decoder<'a> { } pub struct FileDecoder { - pub file: BufReader, + file: File, + buf: Box<[u8]>, + pos: usize, + cap: usize, } impl FileDecoder { #[inline] pub fn new(file: BufReader) -> Self { - FileDecoder { file } + const CAP: usize = 8 * 1024; + let mut buf = Vec::with_capacity(CAP); + buf.resize(CAP, 0u8); + let old_buf = file.buffer(); + let len = old_buf.len(); + buf[..len].copy_from_slice(old_buf); + let file = file.into_inner(); + FileDecoder { file, buf: buf.into(), pos: 0, cap: len } } #[inline] pub fn advance(&mut self, bytes: usize) { - self.file.consume(bytes) + self.pos += bytes; + debug_assert!(self.pos <= self.cap); + } + + #[inline] + pub fn read_all(self) -> Result<(Box<[u8]>, usize), io::Error> { + let mut file = self.file; + let start_pos = file.seek(SeekFrom::Current(0))?; + let start_pos = start_pos.try_into().unwrap(); + file.seek(SeekFrom::Start(0))?; + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes)?; + Ok((bytes.into(), start_pos)) + } + + #[inline] + fn read_byte(&mut self) -> Result { + if self.pos < self.cap { + let c = self.buf[self.pos]; + self.pos += 1; + Ok(c) + } else { + let read = self.file.read(&mut self.buf)?; + self.pos = 0; + self.cap = read; + Ok(self.buf[0]) + } + } + + fn read_exact(&mut self, mut out: &mut [u8]) -> Result<(), io::Error> { + loop { + let len = out.len(); + if len == 0 { + return Ok(()); + } else if self.pos + len < self.cap { + out.copy_from_slice(&self.buf[self.pos..self.pos + len]); + self.pos += len; + return Ok(()); + } + + let available = self.cap - self.pos; + out[..available].copy_from_slice(&self.buf[self.pos..self.cap]); + self.pos += len; + + // Re-fill the buffer starting from zero. + let read = self.file.read(&mut self.buf)?; + self.pos = 0; + self.cap = read; + out = &mut out[available..]; + } + } + + /// Read the buffer until we encounter a byte with its top bit unset. + #[inline] + fn read_for_leb128(&mut self) -> Result<&[u8], io::Error> { + self.fill_for_leb128()?; + Ok(&self.buf[self.pos..self.cap]) + } + + /// Fill the buffer until we encounter a byte with its top bit unset. + /// Fast path. + #[inline] + fn fill_for_leb128(&mut self) -> Result<(), io::Error> { + let buf = &mut self.buf[..]; + let known = &buf[self.pos..self.cap]; + if std::intrinsics::likely(known.iter().any(|c| c & 0x80 == 0)) { + return Ok(()); + } + + self.fill_more_for_leb128() + } + + /// Fill the buffer until we encounter a byte with its top bit unset. + /// Slow path. + #[cold] + fn fill_more_for_leb128(&mut self) -> Result<(), io::Error> { + let buf = &mut self.buf[..]; + let max = leb128::max_leb128_len(); + if self.pos + max >= self.cap { + // The buffer should be large enough. + debug_assert!(self.pos > max); + let len = self.cap - self.pos; + let (start, end) = buf.split_at_mut(self.pos); + start[..len].copy_from_slice(&end[..len]); + self.pos = 0; + self.cap = len; + } + + // We've reached the end of our internal buffer then we need to fetch + // some more data from the file. + loop { + let read = self.file.read(&mut buf[self.cap..])?; + self.cap += read; + + if read == 0 { + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "")); + } + + let known = &mut buf[self.pos..self.cap]; + if known.iter().any(|c| c & 0x80 == 0) { + return Ok(()); + } + } } } macro_rules! read_leb128 { ($dec:expr, $fun:ident, $ty:ty) => {{ - let mut buf = $dec.file.buffer(); - if buf.len() < max_leb128_len!($ty) { - buf = $dec.file.fill_buf()?; - } - let (value, bytes_read): ($ty, usize) = leb128::$fun(&buf); - $dec.file.consume(bytes_read); + let buf = $dec.read_for_leb128()?; + let (value, bytes_read): ($ty, usize) = leb128::$fun(buf); + $dec.advance(bytes_read); Ok(value) }}; } @@ -738,9 +848,7 @@ impl serialize::Decoder for FileDecoder { #[inline] fn read_u8(&mut self) -> Result { - let mut value = [0; 1]; - self.file.read_exact(&mut value)?; - let [value] = value; + let value = self.read_byte()?; Ok(value) } @@ -809,7 +917,7 @@ impl serialize::Decoder for FileDecoder { let len = self.read_usize()?; let mut buf = Vec::new(); buf.resize(len, 0u8); - self.file.read_exact(&mut buf)?; + self.read_exact(&mut buf)?; let s = String::from_utf8(buf).unwrap(); Ok(Cow::Owned(s)) } @@ -821,7 +929,7 @@ impl serialize::Decoder for FileDecoder { #[inline] fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), Self::Error> { - self.file.read_exact(unsafe { MaybeUninit::slice_assume_init_mut(s) }) + self.read_exact(unsafe { MaybeUninit::slice_assume_init_mut(s) }) } } From 296ca4cb090ae21637cccabde50cf52cb0c0db45 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sun, 14 Mar 2021 00:33:47 +0100 Subject: [PATCH 6/6] Reuse buffer for read_to_end. --- .../src/persist/file_format.rs | 8 ++-- .../rustc_incremental/src/persist/load.rs | 17 +++------ compiler/rustc_serialize/src/opaque.rs | 37 ++++++++++--------- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index 0001f54c694de..107a78783802b 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -11,10 +11,10 @@ use std::env; use std::fs; -use std::io::{self, Read}; +use std::io; use std::path::Path; -use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_serialize::opaque::{FileDecoder, FileEncodeResult, FileEncoder}; use rustc_serialize::Encoder; /// The first few bytes of files generated by incremental compilation. @@ -54,14 +54,14 @@ pub fn read_file( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> io::Result>> { +) -> io::Result> { let file = match fs::File::open(path) { Ok(file) => file, Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err), }; - let mut file = io::BufReader::new(file); + let mut file = FileDecoder::new(file); // Check FILE_MAGIC { diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 6e56febae833c..652ef3de9b6a2 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -7,8 +7,6 @@ use rustc_middle::ty::query::OnDiskCache; use rustc_serialize::opaque::FileDecoder; use rustc_serialize::Decodable as RustcDecodable; use rustc_session::Session; -use std::fs; -use std::io::{self, Read, Seek}; use std::path::Path; use super::data::*; @@ -51,7 +49,7 @@ fn load_data( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> LoadResult> { +) -> LoadResult { match file_format::read_file(report_incremental_info, path, nightly_build) { Ok(Some(file)) => LoadResult::Ok { data: file }, Ok(None) => { @@ -118,9 +116,8 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture { let work_products_path = work_products_path(sess); let load_result = load_data(report_incremental_info, &work_products_path, nightly_build); - if let LoadResult::Ok { data: file } = load_result { + if let LoadResult::Ok { data: mut work_product_decoder } = load_result { // Decode the list of work_products - let mut work_product_decoder = FileDecoder::new(file); let work_products: Vec = RustcDecodable::decode(&mut work_product_decoder).unwrap_or_else(|e| { let msg = format!( @@ -165,8 +162,7 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture { match load_data(report_incremental_info, &path, nightly_build) { LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, LoadResult::Error { message } => LoadResult::Error { message }, - LoadResult::Ok { data: file } => { - let mut decoder = FileDecoder::new(file); + LoadResult::Ok { data: mut decoder } => { let prev_commandline_args_hash = u64::decode(&mut decoder) .expect("Error reading commandline arg hash from cached dep-graph"); @@ -213,11 +209,8 @@ pub fn load_query_result_cache<'a>( &query_cache_path(sess), sess.is_nightly_build(), ) { - LoadResult::Ok { data: mut file } => { - let start_pos = file.seek(io::SeekFrom::Current(0)).unwrap() as usize; - file.seek(io::SeekFrom::Start(0)).unwrap(); - let mut bytes = Vec::new(); - file.read_to_end(&mut bytes).unwrap(); + LoadResult::Ok { data: file } => { + let (bytes, start_pos) = file.read_all().unwrap(); Some(OnDiskCache::new(sess, bytes, start_pos, definitions)) } _ => Some(OnDiskCache::new_empty(sess.source_map())), diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 740c3dfac8dd5..79d3e06b4e88b 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -3,7 +3,7 @@ use crate::serialize::{self, Decoder as _, Encoder as _}; use std::borrow::Cow; use std::convert::TryInto; use std::fs::File; -use std::io::{self, BufReader, Read, Seek, SeekFrom, Write}; +use std::io::{self, Read, Seek, SeekFrom, Write}; use std::mem::MaybeUninit; use std::path::Path; use std::ptr; @@ -683,22 +683,18 @@ impl<'a> serialize::Decoder for Decoder<'a> { pub struct FileDecoder { file: File, - buf: Box<[u8]>, + buf: Vec, pos: usize, cap: usize, } impl FileDecoder { #[inline] - pub fn new(file: BufReader) -> Self { + pub fn new(file: File) -> Self { const CAP: usize = 8 * 1024; let mut buf = Vec::with_capacity(CAP); buf.resize(CAP, 0u8); - let old_buf = file.buffer(); - let len = old_buf.len(); - buf[..len].copy_from_slice(old_buf); - let file = file.into_inner(); - FileDecoder { file, buf: buf.into(), pos: 0, cap: len } + FileDecoder { file, buf, pos: 0, cap: 0 } } #[inline] @@ -708,14 +704,21 @@ impl FileDecoder { } #[inline] - pub fn read_all(self) -> Result<(Box<[u8]>, usize), io::Error> { - let mut file = self.file; - let start_pos = file.seek(SeekFrom::Current(0))?; - let start_pos = start_pos.try_into().unwrap(); - file.seek(SeekFrom::Start(0))?; - let mut bytes = Vec::new(); - file.read_to_end(&mut bytes)?; - Ok((bytes.into(), start_pos)) + pub fn read_all(self) -> Result<(Vec, usize), io::Error> { + let FileDecoder { mut file, mut buf, cap, pos } = self; + let file_pos = file.seek(SeekFrom::Current(0))?; + let file_pos: usize = file_pos.try_into().unwrap(); + if file_pos == cap { + // We still have the beginning of the file on-buffer. + // Avoid dropping it and re-reading it. + buf.resize(cap, 0u8); + file.read_to_end(&mut buf)?; + } else { + file.seek(SeekFrom::Start(0))?; + buf.clear(); + file.read_to_end(&mut buf)?; + } + Ok((buf, file_pos - cap + pos)) } #[inline] @@ -732,7 +735,7 @@ impl FileDecoder { } } - fn read_exact(&mut self, mut out: &mut [u8]) -> Result<(), io::Error> { + pub fn read_exact(&mut self, mut out: &mut [u8]) -> Result<(), io::Error> { loop { let len = out.len(); if len == 0 {