|
| 1 | +//! This is a densely packed error representation which is used on targets with |
| 2 | +//! 64-bit pointers. |
| 3 | +//! |
| 4 | +//! (Note that `bitpacked` vs `unpacked` here has no relationship to |
| 5 | +//! `#[repr(packed)]`, it just refers to attempting to use any available |
| 6 | +//! bits in a more clever manner than `rustc`'s default layout algorithm would). |
| 7 | +//! |
| 8 | +//! Conceptually, it stores the same information as the "unpacked" equivalent we |
| 9 | +//! use on other targets: `repr_unpacked::Repr` (see repr_unpacked.rs), however |
| 10 | +//! it packs it into a 64bit non-zero value. |
| 11 | +//! |
| 12 | +//! This optimization not only allows `io::Error` to occupy a single pointer, |
| 13 | +//! but improves `io::Result` as well, especially for situations like |
| 14 | +//! `Result<()>` (which is now 64 bits) or `Result<u64>` (which i), which are |
| 15 | +//! quite common. |
| 16 | +//! |
| 17 | +//! # Layout |
| 18 | +//! Tagged values are 64 bits, with the 2 least significant bits used for the |
| 19 | +//! tag. This means there are there are 4 "variants": |
| 20 | +//! |
| 21 | +//! - **Tag 0b00**: The first variant is equivalent to |
| 22 | +//! `ErrorData::SimpleMessage`, and holds a `&'static SimpleMessage` directly. |
| 23 | +//! |
| 24 | +//! `SimpleMessage` has an alignment >= 4 (which is requested with |
| 25 | +//! `#[repr(align)]` and checked statically at the bottom of this file), which |
| 26 | +//! means every `&'static SimpleMessage` should have the both tag bits as 0, |
| 27 | +//! meaning its tagged and untagged representation are equivalent. |
| 28 | +//! |
| 29 | +//! This means we can skip tagging it, which is necessary as this variant can |
| 30 | +//! be constructed from a `const fn`, which probably cannot tag pointers (or |
| 31 | +//! at least it would be difficult. |
| 32 | +//! |
| 33 | +//! - **Tag 0b01**: The other pointer variant holds the data for |
| 34 | +//! `ErrorData::Custom` and the remaining 62 bits are used to store a |
| 35 | +//! `Box<Custom>`. `Custom` also has alignment >= 4, so the bottom two bits |
| 36 | +//! are free to use for the tag. |
| 37 | +//! |
| 38 | +//! The only important thing to note is that `ptr::add` and `ptr::sub` are |
| 39 | +//! used to tag the pointer, rather than bitwise operations. This should |
| 40 | +//! preserve the pointer's provenance, which would otherwise be lost. |
| 41 | +//! |
| 42 | +//! - **Tag 0b10**: Holds the data for `ErrorData::Os(i32)`. We store the `i32` |
| 43 | +//! in the pointers most significant 32 bits, and don't use the bits `2..32` |
| 44 | +//! for anything. Using the top 32 bits is just to let us easily recover the |
| 45 | +//! `i32` code with the correct sign. |
| 46 | +//! |
| 47 | +//! - **Tag 0b11**: Holds the data for `ErrorData::Simple(ErrorKind)`. This |
| 48 | +//! stores the `ErrorKind` in the top 32 bits as well, although it doesn't |
| 49 | +//! occupy nearly that many. Most of the bits are unused here, but it's not |
| 50 | +//! like we need them for anything else yet. |
| 51 | +//! |
| 52 | +//! # Use of `NonNull<()>` |
| 53 | +//! |
| 54 | +//! Everything is stored in a `NonNull<()>`, which is odd, but actually serves a |
| 55 | +//! purpose. |
| 56 | +//! |
| 57 | +//! Conceptually you might think of this more like: |
| 58 | +//! |
| 59 | +//! ```ignore |
| 60 | +//! union Repr { |
| 61 | +//! // holds integer (Simple/Os) variants, and |
| 62 | +//! // provides access to the tag bits. |
| 63 | +//! bits: NonZeroU64, |
| 64 | +//! // Tag is 0, so this is stored untagged. |
| 65 | +//! msg: &'static SimpleMessage, |
| 66 | +//! // Tagged (offset) `Box<Custom>` pointer. |
| 67 | +//! tagged_custom: NonNull<()>, |
| 68 | +//! } |
| 69 | +//! ``` |
| 70 | +//! |
| 71 | +//! But there are a few problems with this: |
| 72 | +//! |
| 73 | +//! 1. Union access is equivalent to a transmute, so this representation would |
| 74 | +//! require we transmute between integers and pointers in at least one |
| 75 | +//! direction, which may be UB (and even if not, it is likely harder for a |
| 76 | +//! compiler to reason about than explicit ptr->int operations). |
| 77 | +//! |
| 78 | +//! 2. Even if all fields of a union have a niche, the union itself doesn't, |
| 79 | +//! although this may change in the future. This would make things like |
| 80 | +//! `io::Result<()>` and `io::Result<usize>` larger, which defeats part of |
| 81 | +//! the motivation of this bitpacking. |
| 82 | +//! |
| 83 | +//! Storing everything in a `NonZeroUsize` (or some other integer) would be a |
| 84 | +//! bit more traditional for pointer tagging, but it would lose provenance |
| 85 | +//! information, couldn't be constructed from a `const fn`, and would probably |
| 86 | +//! run into other issues as well. |
| 87 | +//! |
| 88 | +//! The `NonNull<()>` seems like the only alternative, even if it's fairly odd |
| 89 | +//! to use a pointer type to store something that may hold an integer, some of |
| 90 | +//! the time. |
| 91 | +
|
| 92 | +use super::{Custom, ErrorData, ErrorKind, SimpleMessage}; |
| 93 | +use alloc::boxed::Box; |
| 94 | +use core::mem::{align_of, size_of}; |
| 95 | +use core::ptr::NonNull; |
| 96 | + |
| 97 | +// The 2 least-significant bits are used as tag. |
| 98 | +const TAG_MASK: usize = 0b11; |
| 99 | +const TAG_SIMPLE_MESSAGE: usize = 0b00; |
| 100 | +const TAG_CUSTOM: usize = 0b01; |
| 101 | +const TAG_OS: usize = 0b10; |
| 102 | +const TAG_SIMPLE: usize = 0b11; |
| 103 | + |
| 104 | +#[repr(transparent)] |
| 105 | +pub(super) struct Repr(NonNull<()>); |
| 106 | + |
| 107 | +// All the types `Repr` stores internally are Send + Sync, and so is it. |
| 108 | +unsafe impl Send for Repr {} |
| 109 | +unsafe impl Sync for Repr {} |
| 110 | + |
| 111 | +impl Repr { |
| 112 | + pub(super) fn new_custom(b: Box<Custom>) -> Self { |
| 113 | + let p = Box::into_raw(b).cast::<u8>(); |
| 114 | + // Should only be possible if an allocator handed out a pointer with |
| 115 | + // wrong alignment. |
| 116 | + debug_assert_eq!((p as usize & TAG_MASK), 0); |
| 117 | + // Safety: We know `TAG_CUSTOM <= size_of::<Custom>()` (static_assert at |
| 118 | + // end of file), and both the start and end of the expression must be |
| 119 | + // valid without address space wraparound due to `Box`'s semantics. |
| 120 | + // Note: `add` is used as a provenance-preserving way of pointer tagging. |
| 121 | + let tagged = unsafe { p.add(TAG_CUSTOM).cast::<()>() }; |
| 122 | + // Safety: the above safety comment also means the result can't be null. |
| 123 | + let res = Self(unsafe { NonNull::new_unchecked(tagged) }); |
| 124 | + // quickly smoke-check we encoded the right thing (This generally will |
| 125 | + // only run in libstd's tests, unless the user uses -Zbuild-std) |
| 126 | + debug_assert!(matches!(res.data(), ErrorData::Custom(_)), "repr(custom) encoding failed"); |
| 127 | + res |
| 128 | + } |
| 129 | + |
| 130 | + #[inline] |
| 131 | + pub(super) fn new_os(code: i32) -> Self { |
| 132 | + let utagged = ((code as usize) << 32) | TAG_OS; |
| 133 | + // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0. |
| 134 | + let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }); |
| 135 | + // quickly smoke-check we encoded the right thing (This generally will |
| 136 | + // only run in libstd's tests, unless the user uses -Zbuild-std) |
| 137 | + debug_assert!( |
| 138 | + matches!(res.data(), ErrorData::Os(c) if c == code), |
| 139 | + "repr(os) encoding failed for {}", |
| 140 | + code, |
| 141 | + ); |
| 142 | + res |
| 143 | + } |
| 144 | + |
| 145 | + #[inline] |
| 146 | + pub(super) fn new_simple(kind: ErrorKind) -> Self { |
| 147 | + let utagged = ((kind as usize) << 32) | TAG_SIMPLE; |
| 148 | + // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0. |
| 149 | + let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }); |
| 150 | + // quickly smoke-check we encoded the right thing (This generally will |
| 151 | + // only run in libstd's tests, unless the user uses -Zbuild-std) |
| 152 | + debug_assert!( |
| 153 | + matches!(res.data(), ErrorData::Simple(k) if k == kind), |
| 154 | + "repr(simple) encoding failed {:?}", |
| 155 | + kind, |
| 156 | + ); |
| 157 | + res |
| 158 | + } |
| 159 | + |
| 160 | + #[inline] |
| 161 | + pub(super) const fn new_simple_message(m: &'static SimpleMessage) -> Self { |
| 162 | + // Safety: We're a Repr, decode_repr is fine. |
| 163 | + Self(unsafe { NonNull::new_unchecked(m as *const _ as *mut ()) }) |
| 164 | + } |
| 165 | + |
| 166 | + #[inline] |
| 167 | + pub(super) fn data(&self) -> ErrorData<&Custom> { |
| 168 | + // Safety: We're a Repr, decode_repr is fine. |
| 169 | + unsafe { decode_repr(self.0, |c| &*c) } |
| 170 | + } |
| 171 | + |
| 172 | + #[inline] |
| 173 | + pub(super) fn data_mut(&mut self) -> ErrorData<&mut Custom> { |
| 174 | + // Safety: We're a Repr, decode_repr is fine. |
| 175 | + unsafe { decode_repr(self.0, |c| &mut *c) } |
| 176 | + } |
| 177 | + |
| 178 | + #[inline] |
| 179 | + pub(super) fn into_data(self) -> ErrorData<Box<Custom>> { |
| 180 | + let this = core::mem::ManuallyDrop::new(self); |
| 181 | + // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is |
| 182 | + // safe because we prevent double-drop using `ManuallyDrop`. |
| 183 | + unsafe { decode_repr(this.0, |p| Box::from_raw(p)) } |
| 184 | + } |
| 185 | +} |
| 186 | + |
| 187 | +impl Drop for Repr { |
| 188 | + #[inline] |
| 189 | + fn drop(&mut self) { |
| 190 | + // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is |
| 191 | + // safe because we're being dropped. |
| 192 | + unsafe { |
| 193 | + let _ = decode_repr(self.0, |p| Box::<Custom>::from_raw(p)); |
| 194 | + } |
| 195 | + } |
| 196 | +} |
| 197 | + |
| 198 | +// Shared helper to decode a `Repr`'s internal pointer into an ErrorData. |
| 199 | +// |
| 200 | +// Safety: `ptr`'s bits should be encoded as described in the document at the |
| 201 | +// top (it should `some_repr.0`) |
| 202 | +#[inline] |
| 203 | +unsafe fn decode_repr<C, F>(ptr: NonNull<()>, make_custom: F) -> ErrorData<C> |
| 204 | +where |
| 205 | + F: FnOnce(*mut Custom) -> C, |
| 206 | +{ |
| 207 | + let bits = ptr.as_ptr() as usize; |
| 208 | + match bits & TAG_MASK { |
| 209 | + TAG_OS => { |
| 210 | + let code = ((bits as i64) >> 32) as i32; |
| 211 | + ErrorData::Os(code) |
| 212 | + } |
| 213 | + TAG_SIMPLE => { |
| 214 | + let kind_bits = (bits >> 32) as u32; |
| 215 | + let kind = kind_from_prim(kind_bits).unwrap_or_else(|| { |
| 216 | + debug_assert!(false, "Invalid io::error::Repr bits: `Repr({:#016x})`", bits); |
| 217 | + // This means the `ptr` passed in was not valid, which voilates |
| 218 | + // the unsafe contract of `decode_repr`. |
| 219 | + // |
| 220 | + // Using this rather than unwrap meaningfully improves the code |
| 221 | + // for callers which only care about one variant (usually |
| 222 | + // `Custom`) |
| 223 | + core::hint::unreachable_unchecked(); |
| 224 | + }); |
| 225 | + ErrorData::Simple(kind) |
| 226 | + } |
| 227 | + TAG_SIMPLE_MESSAGE => ErrorData::SimpleMessage(&*ptr.cast::<SimpleMessage>().as_ptr()), |
| 228 | + TAG_CUSTOM => { |
| 229 | + let custom = ptr.as_ptr().cast::<u8>().sub(TAG_CUSTOM).cast::<Custom>(); |
| 230 | + ErrorData::Custom(make_custom(custom)) |
| 231 | + } |
| 232 | + _ => { |
| 233 | + // Can't happen, and compiler can tell |
| 234 | + unreachable!(); |
| 235 | + } |
| 236 | + } |
| 237 | +} |
| 238 | + |
| 239 | +// This compiles to the same code as the check+transmute, but doesn't require |
| 240 | +// unsafe, or to hard-code max ErrorKind or its size in a way the compiler |
| 241 | +// couldn't verify. |
| 242 | +#[inline] |
| 243 | +fn kind_from_prim(ek: u32) -> Option<ErrorKind> { |
| 244 | + macro_rules! from_prim { |
| 245 | + ($prim:expr => $Enum:ident { $($Variant:ident),* $(,)? }) => {{ |
| 246 | + // Force a compile error if the list gets out of date. |
| 247 | + const _: fn(e: $Enum) = |e: $Enum| match e { |
| 248 | + $($Enum::$Variant => ()),* |
| 249 | + }; |
| 250 | + match $prim { |
| 251 | + $(v if v == ($Enum::$Variant as _) => Some($Enum::$Variant),)* |
| 252 | + _ => None, |
| 253 | + } |
| 254 | + }} |
| 255 | + } |
| 256 | + from_prim!(ek => ErrorKind { |
| 257 | + NotFound, |
| 258 | + PermissionDenied, |
| 259 | + ConnectionRefused, |
| 260 | + ConnectionReset, |
| 261 | + HostUnreachable, |
| 262 | + NetworkUnreachable, |
| 263 | + ConnectionAborted, |
| 264 | + NotConnected, |
| 265 | + AddrInUse, |
| 266 | + AddrNotAvailable, |
| 267 | + NetworkDown, |
| 268 | + BrokenPipe, |
| 269 | + AlreadyExists, |
| 270 | + WouldBlock, |
| 271 | + NotADirectory, |
| 272 | + IsADirectory, |
| 273 | + DirectoryNotEmpty, |
| 274 | + ReadOnlyFilesystem, |
| 275 | + FilesystemLoop, |
| 276 | + StaleNetworkFileHandle, |
| 277 | + InvalidInput, |
| 278 | + InvalidData, |
| 279 | + TimedOut, |
| 280 | + WriteZero, |
| 281 | + StorageFull, |
| 282 | + NotSeekable, |
| 283 | + FilesystemQuotaExceeded, |
| 284 | + FileTooLarge, |
| 285 | + ResourceBusy, |
| 286 | + ExecutableFileBusy, |
| 287 | + Deadlock, |
| 288 | + CrossesDevices, |
| 289 | + TooManyLinks, |
| 290 | + FilenameTooLong, |
| 291 | + ArgumentListTooLong, |
| 292 | + Interrupted, |
| 293 | + Other, |
| 294 | + UnexpectedEof, |
| 295 | + Unsupported, |
| 296 | + OutOfMemory, |
| 297 | + Uncategorized, |
| 298 | + }) |
| 299 | +} |
| 300 | + |
| 301 | +// Some static checking to alert us if a change breaks any of the assumptions |
| 302 | +// that our encoding relies on. If any of these are hit on a platform that |
| 303 | +// libstd supports, we should just make sure `repr_unpacked.rs` is used. |
| 304 | +macro_rules! static_assert { |
| 305 | + ($condition:expr) => { |
| 306 | + const _: [(); 0] = [(); (!$condition) as usize]; |
| 307 | + }; |
| 308 | +} |
| 309 | + |
| 310 | +// The bitpacking we use requires pointers be exactly 64 bits. |
| 311 | +static_assert!(size_of::<NonNull<()>>() == 8); |
| 312 | + |
| 313 | +// We also require pointers and usize be the same size. |
| 314 | +static_assert!(size_of::<NonNull<()>>() == size_of::<usize>()); |
| 315 | + |
| 316 | +// `Custom` and `SimpleMessage` need to be thin pointers. |
| 317 | +static_assert!(size_of::<&'static SimpleMessage>() == 8); |
| 318 | +static_assert!(size_of::<Box<Custom>>() == 8); |
| 319 | + |
| 320 | +// And they must have >= 4 byte alignment. |
| 321 | +static_assert!(align_of::<SimpleMessage>() >= 4); |
| 322 | +static_assert!(align_of::<Custom>() >= 4); |
| 323 | + |
| 324 | +// This is obviously true (`TAG_CUSTOM` is `0b01`), but our implementation of |
| 325 | +// `Repr::new_custom` and such would be UB if it were not, so we check. |
| 326 | +static_assert!(size_of::<Custom>() >= TAG_CUSTOM); |
| 327 | +// These two store a payload which is allowed to be zero, so they must be |
| 328 | +// non-zero to preserve the `NonNull`'s range invariant. |
| 329 | +static_assert!(TAG_OS != 0); |
| 330 | +static_assert!(TAG_SIMPLE != 0); |
| 331 | +// We can't tag `SimpleMessage`s, the tag must be 0. |
| 332 | +static_assert!(TAG_SIMPLE_MESSAGE == 0); |
| 333 | + |
| 334 | +// Check that the point of all of this still holds. |
| 335 | +static_assert!(size_of::<Repr>() == 8); |
| 336 | +static_assert!(size_of::<Option<Repr>>() == 8); |
| 337 | +static_assert!(size_of::<Result<(), Repr>>() == 8); |
| 338 | +static_assert!(size_of::<Result<usize, Repr>>() == 16); |
0 commit comments