1
+ // FIXME(strict_provenance_magic): this module still uses lots of casts to polyfill things.
2
+ #![ cfg_attr( not( bootstrap) , allow( fuzzy_provenance_casts) ) ]
3
+
1
4
use super :: * ;
2
5
use crate :: cmp:: Ordering :: { self , Equal , Greater , Less } ;
3
6
use crate :: intrinsics;
@@ -60,44 +63,37 @@ impl<T: ?Sized> *const T {
60
63
61
64
/// Casts a pointer to its raw bits.
62
65
///
63
- /// This is equivalent to `as usize`, but is more specific to enhance readability.
64
- /// The inverse method is [`from_bits`](#method.from_bits).
65
- ///
66
- /// In particular, `*p as usize` and `p as usize` will both compile for
67
- /// pointers to numeric types but do very different things, so using this
68
- /// helps emphasize that reading the bits was intentional.
69
- ///
70
- /// # Examples
66
+ /// In general, pointers cannot be understood as "just an integer"
67
+ /// and cannot be created from one without additional context.
71
68
///
72
- /// ```
73
- /// #![feature(ptr_to_from_bits)]
74
- /// let array = [13, 42];
75
- /// let p0: *const i32 = &array[0];
76
- /// assert_eq!(<*const _>::from_bits(p0.to_bits()), p0);
77
- /// let p1: *const i32 = &array[1];
78
- /// assert_eq!(p1.to_bits() - p0.to_bits(), 4);
79
- /// ```
69
+ /// If you would like to treat a pointer like an integer anyway,
70
+ /// see [`addr`][] and [`with_addr`][] for the responsible way to do that.
80
71
#[ unstable( feature = "ptr_to_from_bits" , issue = "91126" ) ]
81
- pub fn to_bits ( self ) -> usize
72
+ pub fn to_bits ( self ) -> [ u8 ; core :: mem :: size_of :: < * const ( ) > ( ) ]
82
73
where
83
74
T : Sized ,
84
75
{
85
- self as usize
76
+ unsafe { core :: mem :: transmute ( self ) }
86
77
}
87
78
88
79
/// Creates a pointer from its raw bits.
89
80
///
90
81
/// This is equivalent to `as *const T`, but is more specific to enhance readability.
91
- /// The inverse method is [`to_bits`](#method.to_bits).
82
+ /// The inverse method is [`to_bits`](#method.to_bits-1 ).
92
83
///
93
84
/// # Examples
94
85
///
95
86
/// ```
96
87
/// #![feature(ptr_to_from_bits)]
97
88
/// use std::ptr::NonNull;
98
- /// let dangling: *const u8 = NonNull::dangling().as_ptr();
99
- /// assert_eq!(<*const u8>::from_bits(1), dangling);
89
+ /// let dangling: *mut u8 = NonNull::dangling().as_ptr();
90
+ /// assert_eq!(<*mut u8>::from_bits(1), dangling);
100
91
/// ```
92
+ #[ rustc_deprecated(
93
+ since = "1.61.0" ,
94
+ reason = "This design is incompatible with Pointer Provenance" ,
95
+ suggestion = "from_addr"
96
+ ) ]
101
97
#[ unstable( feature = "ptr_to_from_bits" , issue = "91126" ) ]
102
98
pub fn from_bits ( bits : usize ) -> Self
103
99
where
@@ -106,6 +102,87 @@ impl<T: ?Sized> *const T {
106
102
bits as Self
107
103
}
108
104
105
+ /// Gets the "address" portion of the pointer.
106
+ ///
107
+ /// On most platforms this is a no-op, as the pointer is just an address,
108
+ /// and is equivalent to the deprecated `ptr as usize` cast.
109
+ ///
110
+ /// On more complicated platforms like CHERI and segmented architectures,
111
+ /// this may remove some important metadata. See [`with_addr`][] for
112
+ /// details on this distinction and why it's important.
113
+ #[ unstable( feature = "strict_provenance" , issue = "99999999" ) ]
114
+ pub fn addr ( self ) -> usize
115
+ where
116
+ T : Sized ,
117
+ {
118
+ // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
119
+ self as usize
120
+ }
121
+
122
+ /// Creates a new pointer with the given address.
123
+ ///
124
+ /// See also: [`ptr::fake_alloc`][] and [`ptr::zst_exists`][].
125
+ ///
126
+ /// This replaces the deprecated `usize as ptr` cast, which had
127
+ /// fundamentally broken semantics because it couldn't restore
128
+ /// *segment* and *provenance*.
129
+ ///
130
+ /// A pointer semantically has 3 pieces of information associated with it:
131
+ ///
132
+ /// * Segment: The address-space it is part of.
133
+ /// * Provenance: An allocation (slice) that it is allowed to access.
134
+ /// * Address: The actual address it points at.
135
+ ///
136
+ /// The compiler and hardware need to properly understand all 3 of these
137
+ /// values at all times to properly execute your code.
138
+ ///
139
+ /// Segment and Provenance are implicitly defined by *how* a pointer is
140
+ /// constructed and generally propagates verbatim to all derived pointers.
141
+ /// It is therefore *impossible* to convert an address into a pointer
142
+ /// on its own, because there is no way to know what its segment and
143
+ /// provenance should be.
144
+ ///
145
+ /// By introducing a "representative" pointer into the process we can
146
+ /// properly construct a new pointer with *its* segment and provenance,
147
+ /// just as any other derived pointer would. This *should* be equivalent
148
+ /// to `wrapping_offset`ting the given pointer to the new address. See the
149
+ /// docs for `wrapping_offset` for the restrictions this applies.
150
+ ///
151
+ /// # Example
152
+ ///
153
+ /// Here is an example of how to properly use this API to mess around
154
+ /// with tagged pointers. Here we have a tag in the lowest bit:
155
+ ///
156
+ /// ```ignore
157
+ /// let my_tagged_ptr: *const T = ...;
158
+ ///
159
+ /// // Get the address and do whatever bit tricks we like
160
+ /// let addr = my_tagged_ptr.addr();
161
+ /// let has_tag = (addr & 0x1) != 0;
162
+ /// let real_addr = addr & !0x1;
163
+ ///
164
+ /// // Reconstitute a pointer with the new address and use it
165
+ /// let my_untagged_ptr = my_tagged_ptr.with_addr(real_addr);
166
+ /// let val = *my_untagged_ptr;
167
+ /// ```
168
+ #[ unstable( feature = "strict_provenance" , issue = "99999999" ) ]
169
+ pub fn with_addr ( self , addr : usize ) -> Self
170
+ where
171
+ T : Sized ,
172
+ {
173
+ // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
174
+ //
175
+ // In the mean-time, this operation is defined to be "as if" it was
176
+ // a wrapping_offset, so we can emulate it as such. This should properly
177
+ // restore pointer provenance even under today's compiler.
178
+ let self_addr = self . addr ( ) as isize ;
179
+ let dest_addr = addr as isize ;
180
+ let offset = dest_addr. wrapping_sub ( self_addr) ;
181
+
182
+ // This is the canonical desugarring of this operation
183
+ self . cast :: < u8 > ( ) . wrapping_offset ( offset) . cast :: < T > ( )
184
+ }
185
+
109
186
/// Decompose a (possibly wide) pointer into its address and metadata components.
110
187
///
111
188
/// The pointer can be later reconstructed with [`from_raw_parts`].
@@ -305,10 +382,10 @@ impl<T: ?Sized> *const T {
305
382
/// This operation itself is always safe, but using the resulting pointer is not.
306
383
///
307
384
/// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not
308
- /// be used to read or write other allocated objects.
385
+ /// be used to read or write other allocated objects. This is tracked by provenance.
309
386
///
310
- /// In other words, `let z = x.wrapping_offset((y as isize) - (x as isize))` does *not* make `z `
311
- /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still
387
+ /// In other words, `let z = x.wrapping_offset((y.addr() as isize) - (x.addr() as isize))`
388
+ /// does *not* make `z` the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still
312
389
/// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless
313
390
/// `x` and `y` point into the same allocated object.
314
391
///
@@ -320,8 +397,39 @@ impl<T: ?Sized> *const T {
320
397
///
321
398
/// The delayed check only considers the value of the pointer that was dereferenced, not the
322
399
/// intermediate values used during the computation of the final result. For example,
323
- /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`. In other
324
- /// words, leaving the allocated object and then re-entering it later is permitted.
400
+ /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`...
401
+ ///
402
+ /// Usually.
403
+ ///
404
+ /// More work needs to be done to define the rules here, but on CHERI it is not *actually*
405
+ /// a no-op to wrapping_offset a pointer to some random address and back again. For practical
406
+ /// applications that actually need this, it *will* generally work, but if your offset is
407
+ /// "too out of bounds" the system will mark your pointer as invalid, and subsequent reads
408
+ /// will fault *as if* the pointer had been corrupted by a non-pointer instruction.
409
+ ///
410
+ /// CHERI has a roughly 64-bit address space but its 128-bit pointers contain
411
+ /// 3 ostensibly-address-space-sized values:
412
+ ///
413
+ /// * 2 values for the "slice" that the pointer can access.
414
+ /// * 1 value for the actuall address it points to.
415
+ ///
416
+ /// To accomplish this, CHERI compresses the values and even requires large allocations
417
+ /// to have higher alignment to free up extra bits. This compression scheme can support
418
+ /// the pointer being offset outside of the slice, but only to an extent. A *generous*
419
+ /// extent, but a limited one nonetheless. To quote CHERI's documenation:
420
+ ///
421
+ /// > With 27 bits of the capability used for bounds, CHERI-MIPS and 64-bit
422
+ /// > CHERI-RISC-V provide the following guarantees:
423
+ /// >
424
+ /// > * A pointer is able to travel at least 1⁄4 the size of the object, or 2 KiB,
425
+ /// > whichever is greater, above its upper bound.
426
+ /// > * It is able to travel at least 1⁄8 the size of the object, or 1 KiB,
427
+ /// > whichever is greater, below its lower bound.
428
+ ///
429
+ /// Needless to say, any scheme that relies on reusing the least significant bits
430
+ /// of a pointer based on alignment is going to be fine. Any scheme which tries
431
+ /// to set *high* bits isn't going to work, but that was *already* extremely
432
+ /// platform-specific and not at all portable.
325
433
///
326
434
/// [`offset`]: #method.offset
327
435
/// [allocated object]: crate::ptr#allocated-object
@@ -427,10 +535,10 @@ impl<T: ?Sized> *const T {
427
535
/// ```rust,no_run
428
536
/// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8;
429
537
/// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8;
430
- /// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
538
+ /// let diff = (ptr2.addr() as isize).wrapping_sub(ptr1.addr() as isize);
431
539
/// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
432
540
/// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff);
433
- /// assert_eq!(ptr2 as usize , ptr2_other as usize );
541
+ /// assert_eq!(ptr2.addr() , ptr2_other.addr() );
434
542
/// // Since ptr2_other and ptr2 are derived from pointers to different objects,
435
543
/// // computing their offset is undefined behavior, even though
436
544
/// // they point to the same address!
@@ -653,7 +761,7 @@ impl<T: ?Sized> *const T {
653
761
/// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not
654
762
/// be used to read or write other allocated objects.
655
763
///
656
- /// In other words, `let z = x.wrapping_add((y as usize) - (x as usize ))` does *not* make `z`
764
+ /// In other words, `let z = x.wrapping_add((y.addr()) - (x.addr() ))` does *not* make `z`
657
765
/// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still
658
766
/// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless
659
767
/// `x` and `y` point into the same allocated object.
@@ -715,7 +823,7 @@ impl<T: ?Sized> *const T {
715
823
/// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not
716
824
/// be used to read or write other allocated objects.
717
825
///
718
- /// In other words, `let z = x.wrapping_sub((x as usize) - (y as usize ))` does *not* make `z`
826
+ /// In other words, `let z = x.wrapping_sub((x.addr()) - (y.addr() ))` does *not* make `z`
719
827
/// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still
720
828
/// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless
721
829
/// `x` and `y` point into the same allocated object.
@@ -1003,7 +1111,7 @@ impl<T> *const [T] {
1003
1111
/// use std::ptr;
1004
1112
///
1005
1113
/// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3);
1006
- /// assert_eq!(slice.as_ptr(), 0 as *const i8 );
1114
+ /// assert_eq!(slice.as_ptr(), ptr::null() );
1007
1115
/// ```
1008
1116
#[ inline]
1009
1117
#[ unstable( feature = "slice_ptr_get" , issue = "74265" ) ]
0 commit comments