Skip to content

Commit d0fa6ea

Browse files
authored
(c2rust-analyze) Support ptr-to-ptr casts between safely transmutable types, for now limited to same-sized integers (#839)
- Fixes #840 This introduces the concept of equivalent/compatible/safely transmutable types: https://github.com/immunant/c2rust/blob/2915b8d0c71add21dee1f9d540958ea863478212/c2rust-analyze/src/util.rs#L356-L380 Thus, we can now allow ptr-to-ptr casts between safely transmutable pointee types, whereas previously they were only allowed for equal types. In particular, this enables support for string casts, which are produced by `c2rust transpile` as `b"" as *const u8 as *const core::ffi::c_char`, where `c_char = i8`. Thus, this fixes #840. New tests are added in `string_casts.rs` to cover various ptr casts, though some of them crash in the rewriter due to having implicitly inserted MIR statements like implicit `&raw`s, which are inserted with `addr_of!`s. Thus, for some of these (where it works), there are versions with explicit `addr_of!`s that succeed end-to-end.
2 parents 6f1aeb8 + 31baf0a commit d0fa6ea

File tree

4 files changed

+166
-39
lines changed

4 files changed

+166
-39
lines changed

c2rust-analyze/src/dataflow/type_check.rs

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use super::DataflowConstraints;
22
use crate::c_void_casts::CVoidCastDirection;
33
use crate::context::{AnalysisCtxt, LTy, PermissionSet, PointerId};
44
use crate::panic_detail;
5-
use crate::util::{describe_rvalue, is_null_const, ty_callee, Callee, RvalueDesc};
5+
use crate::util::{
6+
describe_rvalue, is_null_const, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
7+
};
68
use assert_matches::assert_matches;
79
use rustc_hir::def_id::DefId;
810
use rustc_middle::mir::{
@@ -43,11 +45,6 @@ struct TypeChecker<'tcx, 'a> {
4345
equiv_constraints: Vec<(PointerId, PointerId)>,
4446
}
4547

46-
fn is_castable_to<'tcx>(_from_lty: LTy<'tcx>, _to_lty: LTy<'tcx>) -> bool {
47-
// TODO: implement
48-
true
49-
}
50-
5148
impl<'tcx> TypeChecker<'tcx, '_> {
5249
fn add_edge(&mut self, src: PointerId, dest: PointerId) {
5350
// Copying `src` to `dest` can discard permissions, but can't add new ones.
@@ -105,6 +102,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
105102
fn visit_cast(&mut self, cast_kind: CastKind, op: &Operand<'tcx>, to_lty: LTy<'tcx>) {
106103
let to_ty = to_lty.ty;
107104
let from_lty = self.acx.type_of(op);
105+
let from_ty = from_lty.ty;
108106

109107
match cast_kind {
110108
CastKind::PointerFromExposedAddress => {
@@ -114,31 +112,40 @@ impl<'tcx> TypeChecker<'tcx, '_> {
114112
panic!("Creating non-null pointers from exposed addresses not supported");
115113
}
116114
}
117-
CastKind::Pointer(PointerCast::Unsize) => {
118-
let pointee_to_ty = to_ty
119-
.builtin_deref(true)
120-
.unwrap_or_else(|| panic!("unsize cast has non-pointer output {:?}?", to_ty))
121-
.ty;
122-
123-
assert_matches!(from_lty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..));
124-
let pointee_from_lty = assert_matches!(from_lty.args, [lty] => lty);
125-
126-
let elem_to_ty = assert_matches!(pointee_to_ty.kind(), &TyKind::Slice(ty) => ty);
127-
assert!(matches!(pointee_from_lty.kind(), TyKind::Array(..)));
128-
let elem_from_lty = assert_matches!(pointee_from_lty.args, [lty] => lty);
129-
assert_eq!(elem_from_lty.ty, elem_to_ty);
130-
assert_eq!(pointee_from_lty.label, PointerId::NONE);
131-
self.do_assign_pointer_ids(to_lty.label, from_lty.label);
115+
CastKind::PointerExposeAddress => {
116+
// Allow, as [`CastKind::PointerFromExposedAddress`] is the dangerous one,
117+
// and we'll catch (not allow) that above.
118+
// This becomes no longer a pointer, so we don't need to add any dataflow constraints
119+
// (until we try to handle [`CastKind::PointerFromExposedAddress`], if we do).
132120
}
133-
CastKind::Pointer(..) => {
134-
// The source and target types are both pointers, and they have identical pointee types.
135-
// TODO: remove or move check to `is_castable_to`
136-
assert!(from_lty.args[0].ty == to_lty.args[0].ty);
137-
assert!(is_castable_to(from_lty, to_lty));
121+
CastKind::Pointer(ptr_cast) => {
122+
// All of these [`PointerCast`]s are type checked by rustc already.
123+
// They don't involve arbitrary raw ptr to raw ptr casts
124+
// ([PointerCast::MutToConstPointer`] doesn't allow changing types),
125+
// which we need to check for safe transmutability,
126+
// and which are (currently) covered in [`CastKind::Misc`].
127+
// That's why there's a `match` here that does nothing;
128+
// it ensures if [`PointerCast`] is changed in a future `rustc` version,
129+
// this won't compile until we've checked that this reasoning is still accurate.
130+
match ptr_cast {
131+
PointerCast::ReifyFnPointer => {}
132+
PointerCast::UnsafeFnPointer => {}
133+
PointerCast::ClosureFnPointer(_) => {}
134+
PointerCast::MutToConstPointer => {}
135+
PointerCast::ArrayToPointer => {}
136+
PointerCast::Unsize => {}
137+
}
138+
self.do_assign_pointer_ids(to_lty.label, from_lty.label)
139+
// TODO add other dataflow constraints
138140
}
139-
_ => {
140-
// A cast such as `T as U`
141-
assert!(is_castable_to(from_lty, to_lty));
141+
CastKind::Misc => {
142+
match is_transmutable_ptr_cast(from_ty, to_ty) {
143+
Some(true) => {
144+
// TODO add other dataflow constraints
145+
},
146+
Some(false) => ::log::error!("TODO: unsupported ptr-to-ptr cast between pointee types not yet supported as safely transmutable: `{from_ty:?} as {to_ty:?}`"),
147+
None => {}, // not a ptr cast (no dataflow constraints needed); let rustc typeck this
148+
};
142149
}
143150
}
144151

@@ -295,12 +302,21 @@ impl<'tcx> TypeChecker<'tcx, '_> {
295302
}
296303
}
297304

298-
/// Unify corresponding `PointerId`s in `lty1` and `lty2`.
305+
/// Unify corresponding [`PointerId`]s in `lty1` and `lty2`.
306+
///
307+
/// The two inputs must have identical underlying types.
308+
/// For any position where the underlying type has a pointer,
309+
/// this function unifies the [`PointerId`]s that `lty1` and `lty2` have at that position.
310+
/// For example, given
311+
///
312+
/// ```
313+
/// # fn(
314+
/// lty1: *mut /*l1*/ *const /*l2*/ u8,
315+
/// lty2: *mut /*l3*/ *const /*l4*/ u8,
316+
/// # ) {}
317+
/// ```
299318
///
300-
/// The two inputs must have identical underlying types. For any position where the underlying
301-
/// type has a pointer, this function unifies the `PointerId`s that `lty1` and `lty2` have at
302-
/// that position. For example, given `lty1 = *mut /*l1*/ *const /*l2*/ u8` and `lty2 = *mut
303-
/// /*l3*/ *const /*l4*/ u8`, this function will unify `l1` with `l3` and `l2` with `l4`.
319+
/// this function will unify `l1` with `l3` and `l2` with `l4`.
304320
fn do_unify(&mut self, lty1: LTy<'tcx>, lty2: LTy<'tcx>) {
305321
assert_eq!(
306322
self.acx.tcx().erase_regions(lty1.ty),

c2rust-analyze/src/util.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use rustc_middle::ty::{
1212
self, AdtDef, DefIdTree, EarlyBinder, Subst, SubstsRef, Ty, TyCtxt, TyKind, UintTy,
1313
};
1414
use rustc_span::symbol::Symbol;
15+
use rustc_type_ir::IntTy;
1516
use std::fmt::Debug;
1617

1718
#[derive(Debug)]
@@ -356,6 +357,90 @@ pub fn is_null_const(constant: Constant) -> bool {
356357
pub trait PhantomLifetime<'a> {}
357358
impl<'a, T: ?Sized> PhantomLifetime<'a> for T {}
358359

360+
/// Determine if `from` can be safely transmuted to `to`,
361+
/// which is defined as `*(from as *const To)` being a safe operation,
362+
/// where `from: *const From` and assuming `*from` already was safe.
363+
///
364+
/// Note that this is one-way, and is slightly different from [`core::mem::transmute`],
365+
/// and more similar to [`core::mem::transmute_copy`].
366+
///
367+
/// This forms a reflexive, transitive, and non-symmetric (one-way) relation, named `~` below.
368+
/// Formally, `A ~ B` iff whenever `*a` is well-defined (i.e., not UB),
369+
/// `*(a as *const B)` is also well-defined, where `a: *const A`.
370+
///
371+
/// However, safe transmutability is difficult to check completely,
372+
/// so this function only checks a subset of it,
373+
/// with these formal rules for all types `A`, `B`:
374+
///
375+
/// 1. `A = B => A ~ B`
376+
/// 2. `A ~ B => *A ~ *B`
377+
/// 3. `uN ~ iN`, `iN ~ uN`, where `N` is an integer width
378+
/// 4. `A ~ B, N > 0 => [A; N] ~ B`, where `const N: usize`
379+
///
380+
/// Note: 5. `A ~ B => [A] ~ B` is not a rule because it would be unsound for zero-length slices,
381+
/// which we cannot check unlike for arrays, which we need for translated string literals.
382+
///
383+
/// Thus, [`true`] means it is definitely transmutable,
384+
/// while [`false`] means it may not be transmutable.
385+
///
386+
/// Also note that for `A ~ B`, we need at least
387+
/// * `size_of::<A>() >= size_of::<B>()`
388+
/// * `align_of::<A>() >= align_of::<B>()`
389+
///
390+
/// For rules 1 and 3, this obviously holds.
391+
/// For rule 2, this holds as long as
392+
/// `A ~ B` implies that (`*B` is a fat ptr implies `*A` is a fat ptr).
393+
///
394+
/// For rule 1, this holds trivially.
395+
/// For rule 2, this holds because `**A` and `**B` are always thin ptrs.
396+
/// For rule 3, this holds trivially.
397+
/// For rule 4, this holds because if `*A` is a fat ptr,
398+
/// `A` is unsized, and thus `[A; N]` is ill-formed to begin with.
399+
/// For (almost) rule 5, this holds because `*[A]` is always a fat ptr.
400+
pub fn is_transmutable_to<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> bool {
401+
let transmutable_ints = || {
402+
use IntTy::*;
403+
use UintTy::*;
404+
match (from.kind(), to.kind()) {
405+
(ty::Uint(u), ty::Int(i)) | (ty::Int(i), ty::Uint(u)) => {
406+
matches!(
407+
(u, i),
408+
(Usize, Isize) | (U8, I8) | (U16, I16) | (U32, I32) | (U64, I64)
409+
)
410+
}
411+
_ => false,
412+
}
413+
};
414+
415+
let one_way_transmutable = || match *from.kind() {
416+
ty::Array(from, n) => {
417+
is_transmutable_to(from, to) && {
418+
let is_zero = n.kind().try_to_scalar_int().unwrap().is_null();
419+
!is_zero
420+
}
421+
}
422+
_ => false,
423+
};
424+
425+
from == to
426+
|| is_transmutable_ptr_cast(from, to).unwrap_or(false)
427+
|| transmutable_ints()
428+
|| one_way_transmutable()
429+
}
430+
431+
/// Determine if `from as to` is a ptr-to-ptr cast.
432+
/// and if it is, if the pointee types are [safely transmutable](is_transmutable_to).
433+
///
434+
/// This returns [`Some`]`(is_transmutable)` if they're both pointers,
435+
/// and [`None`] if its some other types.
436+
///
437+
/// See [`is_transmutable_to`] for the definition of safe transmutability.
438+
pub fn is_transmutable_ptr_cast<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> Option<bool> {
439+
let from = from.builtin_deref(true)?.ty;
440+
let to = to.builtin_deref(true)?.ty;
441+
Some(is_transmutable_to(from, to))
442+
}
443+
359444
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
360445
pub enum TestAttr {
361446
/// `#[c2rust_analyze_test::fixed_signature]`: Mark all pointers in the function signature as
Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,35 @@
1-
#[cfg(any())]
2-
pub fn cast_only(s: *const u8) {
1+
pub fn cast_ptr_to_ptr(s: *const u8) {
32
s as *const core::ffi::c_char;
43
}
54

5+
pub fn deep_cast_ptr_to_ptr(x: *const *const u8) {
6+
x as *const *const i8;
7+
}
8+
9+
/// For the below disabled (`#[cfg(any())]`ed) tests, they currently crash in the rewriter
10+
/// due to it not being able to handle implicitly inserted `&raw` MIR statements yet.
11+
/// Thus, they also have `*_explicit` versions where
12+
/// a `std::ptr::addr_of!` is used to make the `&raw` explicit.
13+
///
14+
/// Also note that `addr_of!` (with a `use std::ptr::addr_of`)
15+
/// and `::core::ptr::addr_of!` don't work either,
16+
/// though `std::ptr::addr_of`, `::std::ptr::addr_of!`,
17+
/// and `core::ptr::addr_of!` do work.
18+
19+
#[cfg(any())]
20+
pub fn cast_array_to_ptr(s: &[u8; 1]) {
21+
s as *const u8;
22+
}
23+
24+
pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
25+
std::ptr::addr_of!(*s) as *const u8;
26+
}
27+
628
#[cfg(any())]
729
pub fn cast_from_literal() {
8-
b"" as *const u8 as *const core::ffi::c_char;
30+
b"\0" as *const u8 as *const core::ffi::c_char;
31+
}
32+
33+
pub fn cast_from_literal_explicit() {
34+
std::ptr::addr_of!(*b"\0") as *const u8 as *const core::ffi::c_char;
935
}

c2rust-analyze/tests/filecheck/addr_of.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ fn shared_ref_with_struct() {
2525
let y = std::ptr::addr_of!(x.a);
2626
}
2727

28-
// CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 0]) {
29-
pub fn cast_array_to_ptr_explicit(s: &[u8; 0]) {
28+
// CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
29+
pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
3030
// CHECK-DAG: &*(std::ptr::addr_of!(*s)) as *const u8
3131
std::ptr::addr_of!(*s) as *const u8;
3232
}

0 commit comments

Comments
 (0)