Skip to content

Commit 872ba2c

Browse files
committed
auto merge of #19294 : huonw/rust/transmute-inplace, r=nikomatsakis
This detects (a subset of) the cases when `transmute::<T, U>(x)` can be lowered to a direct `bitcast T x to U` in LLVM. This assists with efficiently handling a SIMD vector as multiple different types, e.g. swapping bytes/words/double words around inside some larger vector type. C compilers like GCC and Clang handle integer vector types as `__m128i` for all widths, and implicitly insert bitcasts as required. This patch allows Rust to express this, even if it takes a bit of `unsafe`, whereas previously it was impossible to do at all without inline assembly. Example: pub fn reverse_u32s(u: u64x2) -> u64x2 { unsafe { let tmp = mem::transmute::<_, u32x4>(u); let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0); mem::transmute::<_, u64x2>(swapped) } } Compiling with `--opt-level=3` gives: Before define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 { entry-block: %1 = bitcast <2 x i64> %0 to i128 %u.0.extract.trunc = trunc i128 %1 to i32 %u.4.extract.shift = lshr i128 %1, 32 %u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32 %u.8.extract.shift = lshr i128 %1, 64 %u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32 %u.12.extract.shift = lshr i128 %1, 96 %u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32 %2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0 %3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1 %4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2 %5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3 %6 = bitcast <4 x i32> %5 to <2 x i64> ret <2 x i64> %6 } _ZN12reverse_u32s20hbdb206aba18a03d8tbaE: .cfi_startproc movd %xmm0, %rax punpckhqdq %xmm0, %xmm0 movd %xmm0, %rcx movq %rcx, %rdx shrq $32, %rdx movq %rax, %rsi shrq $32, %rsi movd %eax, %xmm0 movd %ecx, %xmm1 punpckldq %xmm0, %xmm1 movd %esi, %xmm2 movd %edx, %xmm0 punpckldq %xmm2, %xmm0 punpckldq %xmm1, %xmm0 retq After define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 { entry-block: %1 = bitcast <2 x i64> %0 to <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } _ZN12reverse_u32s20hbdb206aba18a03d8tbaE: .cfi_startproc pshufd $27, %xmm0, %xmm0 retq
2 parents c38e73f + 1a62066 commit 872ba2c

File tree

1 file changed

+59
-6
lines changed

1 file changed

+59
-6
lines changed

src/librustc_trans/trans/intrinsic.rs

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#![allow(non_upper_case_globals)]
1212

1313
use llvm;
14-
use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef};
14+
use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef, TypeKind};
1515
use middle::subst;
1616
use middle::subst::FnSpace;
1717
use trans::base::*;
@@ -174,12 +174,65 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
174174
// This should be caught by the intrinsicck pass
175175
assert_eq!(in_type_size, out_type_size);
176176

177-
// We need to cast the dest so the types work out
178-
let dest = match dest {
179-
expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())),
180-
expr::Ignore => expr::Ignore
177+
let nonpointer_nonaggregate = |llkind: TypeKind| -> bool {
178+
use llvm::TypeKind::*;
179+
match llkind {
180+
Half | Float | Double | X86_FP80 | FP128 |
181+
PPC_FP128 | Integer | Vector | X86_MMX => true,
182+
_ => false
183+
}
184+
};
185+
186+
// An approximation to which types can be directly cast via
187+
// LLVM's bitcast. This doesn't cover pointer -> pointer casts,
188+
// but does, importantly, cover SIMD types.
189+
let in_kind = llintype.kind();
190+
let ret_kind = llret_ty.kind();
191+
let bitcast_compatible =
192+
(nonpointer_nonaggregate(in_kind) && nonpointer_nonaggregate(ret_kind)) || {
193+
in_kind == TypeKind::Pointer && ret_kind == TypeKind::Pointer
194+
};
195+
196+
let dest = if bitcast_compatible {
197+
// if we're here, the type is scalar-like (a primitive, a
198+
// SIMD type or a pointer), and so can be handled as a
199+
// by-value ValueRef and can also be directly bitcast to the
200+
// target type. Doing this special case makes conversions
201+
// like `u32x4` -> `u64x2` much nicer for LLVM and so more
202+
// efficient (these are done efficiently implicitly in C
203+
// with the `__m128i` type and so this means Rust doesn't
204+
// lose out there).
205+
let expr = &*arg_exprs[0];
206+
let datum = unpack_datum!(bcx, expr::trans(bcx, expr));
207+
let datum = unpack_datum!(bcx, datum.to_rvalue_datum(bcx, "transmute_temp"));
208+
let val = if datum.kind.is_by_ref() {
209+
load_ty(bcx, datum.val, datum.ty)
210+
} else {
211+
datum.val
212+
};
213+
214+
let cast_val = BitCast(bcx, val, llret_ty);
215+
216+
match dest {
217+
expr::SaveIn(d) => {
218+
// this often occurs in a sequence like `Store(val,
219+
// d); val2 = Load(d)`, so disappears easily.
220+
Store(bcx, cast_val, d);
221+
}
222+
expr::Ignore => {}
223+
}
224+
dest
225+
} else {
226+
// The types are too complicated to do with a by-value
227+
// bitcast, so pointer cast instead. We need to cast the
228+
// dest so the types work out.
229+
let dest = match dest {
230+
expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())),
231+
expr::Ignore => expr::Ignore
232+
};
233+
bcx = expr::trans_into(bcx, &*arg_exprs[0], dest);
234+
dest
181235
};
182-
bcx = expr::trans_into(bcx, &*arg_exprs[0], dest);
183236

184237
fcx.pop_custom_cleanup_scope(cleanup_scope);
185238

0 commit comments

Comments
 (0)