From 505f03b517c99297bf25ca0eaa8598084e3750fa Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 17 Jun 2023 02:27:24 -0700 Subject: [PATCH 1/2] Oh "fun", an ICE that doesn't happen locally --- compiler/rustc_expand/src/proc_macro_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 2dc9b51a37ea0..8c5be81ddaf30 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -290,7 +290,7 @@ impl ToInternal> b'$' => Dollar, b'?' => Question, b'\'' => SingleQuote, - _ => unreachable!(), + _ => unreachable!("{ch} not expected in Punct"), }; smallvec![if joint { tokenstream::TokenTree::token_joint(kind, span) From 2627981b7eb4f9ce483fcbb7ccca340911988b71 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 17 Jun 2023 01:52:00 -0700 Subject: [PATCH 2/2] Avoid `memcpy` in codegen for more types, notably `Vec` PR 111999 set up the framework to be able to do this; this PR expands it to more types than just arrays. Most interestingly, this allows it to work with `Vec` and `String`, so swapping those no longer ends up going through stack like it does today (). And since this is done in codegen, it's not special for `swap`, and thus will hopefully allow types like this to better optimize in lots of places, with easier SRoA. --- compiler/rustc_codegen_llvm/src/type_of.rs | 36 ++++++++-- tests/assembly/swap-strings.rs | 23 ++++++ tests/codegen/issues/issue-15953.rs | 13 ++-- tests/codegen/issues/issue-86106.rs | 25 ++++--- tests/codegen/loads.rs | 14 +++- tests/codegen/mem-replace-simple-type.rs | 15 +++- tests/codegen/packed.rs | 24 +++++-- .../simd-intrinsic-transmute-array.rs | 6 +- tests/codegen/swap-small-types.rs | 70 ++++++++++++++++--- tests/codegen/vec-in-place.rs | 58 +++++++++++++++ 10 files changed, 248 insertions(+), 36 deletions(-) create mode 100644 tests/assembly/swap-strings.rs diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs index 2be7bce115dd8..0d00cd3b8602c 100644 --- a/compiler/rustc_codegen_llvm/src/type_of.rs +++ b/compiler/rustc_codegen_llvm/src/type_of.rs @@ -407,7 +407,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> { // arrays but don't count as aggregate types if let FieldsShape::Array { count, .. } = self.layout.fields() && let element = self.field(cx, 0) - && element.ty.is_integral() + && element.ty.is_primitive() { // `cx.type_ix(bits)` is tempting here, but while that works great // for things that *stay* as memory-to-memory copies, it also ends @@ -418,8 +418,36 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> { return Some(cx.type_vector(ety, *count)); } - // FIXME: The above only handled integer arrays; surely more things - // would also be possible. Be careful about provenance, though! - None + // Ensure the type isn't too complex nor otherwise ineligible + is_scalar_copy_reasonable(4, self.ty, cx)?; + + // Otherwise we can load/store it via a long-enough integer type + Some(cx.type_ix(self.layout.size().bits())) + } +} + +fn is_scalar_copy_reasonable<'a, 'tcx>( + max_fields: u32, + t: Ty<'tcx>, + cx: &CodegenCx<'a, 'tcx>, +) -> Option { + if t.is_any_ptr() || t.is_primitive() { + return max_fields.checked_sub(1); + } + + match t.kind() { + ty::Tuple(field_tys) => field_tys + .into_iter() + .try_fold(max_fields, |mf, tt| is_scalar_copy_reasonable(mf, tt, cx)), + // Unions are magic and can carry anything, regardless of their field + // types, so force them to always go through `memcpy`. + ty::Adt(adt_def, _) if adt_def.is_union() => None, + // If there could be multiple variants, just use `memcpy` for now. + ty::Adt(adt_def, _) if adt_def.variants().len() != 1 => None, + ty::Adt(adt_def, substs) => adt_def.all_fields().try_fold(max_fields, |mf, field_def| { + let field_ty = field_def.ty(cx.tcx, substs); + is_scalar_copy_reasonable(mf, field_ty, cx) + }), + _ => None, } } diff --git a/tests/assembly/swap-strings.rs b/tests/assembly/swap-strings.rs new file mode 100644 index 0000000000000..3817769631eaf --- /dev/null +++ b/tests/assembly/swap-strings.rs @@ -0,0 +1,23 @@ +// assembly-output: emit-asm +// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel +// only-x86_64 +// ignore-sgx +// ignore-debug + +// Ensure that the swap uses SIMD registers and does not go to stack. + +// CHECK-LABEL: swap_strings_xmm: +#[no_mangle] +pub fn swap_strings_xmm(a: &mut String, b: &mut String) { + // CHECK-DAG: movups [[A1:xmm.+]], xmmword ptr [[AX:.+]] + // CHECK-DAG: mov [[A2:r.+]], qword ptr [[AQ:.+]] + // CHECK-DAG: movups [[B1:xmm.+]], xmmword ptr [[BX:.+]] + // CHECK-DAG: mov [[B2:r.+]], qword ptr [[BQ:.+]] + // CHECK-NOT: mov + // CHECK-DAG: movups xmmword ptr [[AX]], [[B1]] + // CHECK-DAG: mov qword ptr [[AQ]], [[B2]] + // CHECK-DAG: movups xmmword ptr [[BX]], [[A1]] + // CHECK-DAG: mov qword ptr [[BQ]], [[A2]] + // CHECK: ret + std::mem::swap(a, b); +} diff --git a/tests/codegen/issues/issue-15953.rs b/tests/codegen/issues/issue-15953.rs index 28d28428904f5..d8541c99be2dc 100644 --- a/tests/codegen/issues/issue-15953.rs +++ b/tests/codegen/issues/issue-15953.rs @@ -1,14 +1,17 @@ // Test that llvm generates `memcpy` for moving a value // inside a function and moving an argument. +#[derive(Default, Debug)] +struct RatherLargeType(usize, isize, usize, isize, usize, isize); + struct Foo { - x: Vec, + x: RatherLargeType, } #[inline(never)] #[no_mangle] // CHECK: memcpy -fn interior(x: Vec) -> Vec { +fn interior(x: RatherLargeType) -> RatherLargeType { let Foo { x } = Foo { x: x }; x } @@ -16,14 +19,14 @@ fn interior(x: Vec) -> Vec { #[inline(never)] #[no_mangle] // CHECK: memcpy -fn exterior(x: Vec) -> Vec { +fn exterior(x: RatherLargeType) -> RatherLargeType { x } fn main() { - let x = interior(Vec::new()); + let x = interior(RatherLargeType::default()); println!("{:?}", x); - let x = exterior(Vec::new()); + let x = exterior(RatherLargeType::default()); println!("{:?}", x); } diff --git a/tests/codegen/issues/issue-86106.rs b/tests/codegen/issues/issue-86106.rs index 15aef344ac0c9..f26512a04b029 100644 --- a/tests/codegen/issues/issue-86106.rs +++ b/tests/codegen/issues/issue-86106.rs @@ -2,14 +2,15 @@ // compile-flags: -C opt-level=3 -Z merge-functions=disabled // The below two functions ensure that both `String::new()` and `"".to_string()` -// produce the identical code. +// generate their values directly, rather that creating a constant and copying +// that constant (which takes more instructions because of PIC). #![crate_type = "lib"] // CHECK-LABEL: define {{(dso_local )?}}void @string_new #[no_mangle] pub fn string_new() -> String { - // CHECK: store ptr inttoptr + // CHECK: store {{i16|i32|i64}} 1, ptr %_0, // CHECK-NEXT: getelementptr // CHECK-NEXT: call void @llvm.memset // CHECK-NEXT: ret void @@ -19,10 +20,8 @@ pub fn string_new() -> String { // CHECK-LABEL: define {{(dso_local )?}}void @empty_to_string #[no_mangle] pub fn empty_to_string() -> String { - // CHECK: store ptr inttoptr - // CHECK-NEXT: getelementptr - // CHECK-NEXT: call void @llvm.memset - // CHECK-NEXT: ret void + // CHECK: store {{i48|i96|i192}} 1, ptr %_0, align {{2|4|8}} + // CHECK-NEXT: ret "".to_string() } @@ -32,7 +31,7 @@ pub fn empty_to_string() -> String { // CHECK-LABEL: @empty_vec #[no_mangle] pub fn empty_vec() -> Vec { - // CHECK: store ptr inttoptr + // CHECK: store ptr inttoptr ({{i16|i32|i64}} 1 to ptr), ptr %_0, // CHECK-NEXT: getelementptr // CHECK-NEXT: call void @llvm.memset // CHECK-NEXT: ret void @@ -42,9 +41,19 @@ pub fn empty_vec() -> Vec { // CHECK-LABEL: @empty_vec_clone #[no_mangle] pub fn empty_vec_clone() -> Vec { - // CHECK: store ptr inttoptr + // CHECK: store {{i16|i32|i64}} 1, ptr %_0, // CHECK-NEXT: getelementptr // CHECK-NEXT: call void @llvm.memset // CHECK-NEXT: ret void vec![].clone() } + +// CHECK-LABEL: @empty_vec_from_array +#[no_mangle] +pub fn empty_vec_from_array() -> Vec { + // CHECK: store ptr inttoptr ({{i16|i32|i64}} 1 to ptr), ptr %_0, + // CHECK-NEXT: getelementptr + // CHECK-NEXT: call void @llvm.memset + // CHECK-NEXT: ret void + [].into() +} diff --git a/tests/codegen/loads.rs b/tests/codegen/loads.rs index 4a09a1dc0339e..0d5c423c42eb4 100644 --- a/tests/codegen/loads.rs +++ b/tests/codegen/loads.rs @@ -136,12 +136,22 @@ pub fn small_array_alignment(x: [i8; 4]) -> [i8; 4] { x } -// CHECK-LABEL: small_struct_alignment +// CHECK-LABEL: i32 @small_struct_alignment(i32 %0) // The struct is loaded as i32, but its alignment is lower, go with 1 byte to avoid target // dependent alignment #[no_mangle] pub fn small_struct_alignment(x: Bytes) -> Bytes { - // CHECK: [[VAR:%[0-9]+]] = load i32, ptr %{{.*}}, align 1 + // CHECK: [[RETP:%.+]] = alloca %Bytes, align 1 + // CHECK: [[ALIGNED:%.+]] = alloca i32, align 4 + // CHECK: %x = alloca %Bytes, align 1 + + // CHECK: store i32 %0, ptr [[ALIGNED]], align 4 + // CHECK: call void @llvm.memcpy{{.+}}(ptr align 1 %x, ptr align 4 %1, i64 4, i1 false) + + // CHECK: [[TEMP:%[0-9]+]] = load i32, ptr %x, align 1 + // CHECK: store i32 [[TEMP]], ptr [[RETP]], align 1 + + // CHECK: [[VAR:%[0-9]+]] = load i32, ptr [[RETP]], align 1 // CHECK: ret i32 [[VAR]] x } diff --git a/tests/codegen/mem-replace-simple-type.rs b/tests/codegen/mem-replace-simple-type.rs index 174ac608e01b5..a9bc5752fe18b 100644 --- a/tests/codegen/mem-replace-simple-type.rs +++ b/tests/codegen/mem-replace-simple-type.rs @@ -37,8 +37,19 @@ pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str { pub fn replace_short_array(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] { // CHECK-NOT: alloca // CHECK: %[[R:.+]] = load <3 x i32>, ptr %r, align 4 - // CHECK: store <3 x i32> %[[R]], ptr %result + // CHECK: store <3 x i32> %[[R]], ptr %result, align 4 // CHECK: %[[V:.+]] = load <3 x i32>, ptr %v, align 4 - // CHECK: store <3 x i32> %[[V]], ptr %r + // CHECK: store <3 x i32> %[[V]], ptr %r, align 4 + std::mem::replace(r, v) +} + +#[no_mangle] +// CHECK-LABEL: @replace_string( +pub fn replace_string(r: &mut String, v: String) -> String { + // CHECK-NOT: alloca + // CHECK: %[[R:.+]] = load i192, ptr %r, align 8 + // CHECK: store i192 %[[R]], ptr %result, align 8 + // CHECK: %[[V:.+]] = load i192, ptr %v, align 8 + // CHECK: store i192 %[[V]], ptr %r, align 8 std::mem::replace(r, v) } diff --git a/tests/codegen/packed.rs b/tests/codegen/packed.rs index 96cd9a42e7dd6..c7ec03280c8e8 100644 --- a/tests/codegen/packed.rs +++ b/tests/codegen/packed.rs @@ -119,14 +119,22 @@ pub struct Packed2Pair(u8, u32); // CHECK-LABEL: @pkd1_pair #[no_mangle] pub fn pkd1_pair(pair1: &mut Packed1Pair, pair2: &mut Packed1Pair) { -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 1 %{{.*}}, ptr align 1 %{{.*}}, i{{[0-9]+}} 5, i1 false) +// CHECK: [[ALLOCA:%.+]] = alloca %Packed1Pair, align 1 +// CHECK: [[TEMP1:%.+]] = load i40, ptr %pair1, align 1 +// CHECK: store i40 [[TEMP1]], ptr [[ALLOCA]], align 1 +// CHECK: [[TEMP2:%.+]] = load i40, ptr [[ALLOCA]], align 1 +// CHECK: store i40 [[TEMP2]], ptr %pair2, align 1 *pair2 = *pair1; } // CHECK-LABEL: @pkd2_pair #[no_mangle] pub fn pkd2_pair(pair1: &mut Packed2Pair, pair2: &mut Packed2Pair) { -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 2 %{{.*}}, ptr align 2 %{{.*}}, i{{[0-9]+}} 6, i1 false) +// CHECK: [[ALLOCA:%.+]] = alloca %Packed2Pair, align 2 +// CHECK: [[TEMP1:%.+]] = load i48, ptr %pair1, align 2 +// CHECK: store i48 [[TEMP1]], ptr [[ALLOCA]], align 2 +// CHECK: [[TEMP2:%.+]] = load i48, ptr [[ALLOCA]], align 2 +// CHECK: store i48 [[TEMP2]], ptr %pair2, align 2 *pair2 = *pair1; } @@ -141,13 +149,21 @@ pub struct Packed2NestedPair((u32, u32)); // CHECK-LABEL: @pkd1_nested_pair #[no_mangle] pub fn pkd1_nested_pair(pair1: &mut Packed1NestedPair, pair2: &mut Packed1NestedPair) { -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 1 %{{.*}}, ptr align 1 %{{.*}}, i{{[0-9]+}} 8, i1 false) +// CHECK: [[ALLOCA:%.+]] = alloca %Packed1NestedPair, align 1 +// CHECK: [[TEMP1:%.+]] = load i64, ptr %pair1, align 1 +// CHECK: store i64 [[TEMP1]], ptr [[ALLOCA]], align 1 +// CHECK: [[TEMP2:%.+]] = load i64, ptr [[ALLOCA]], align 1 +// CHECK: store i64 [[TEMP2]], ptr %pair2, align 1 *pair2 = *pair1; } // CHECK-LABEL: @pkd2_nested_pair #[no_mangle] pub fn pkd2_nested_pair(pair1: &mut Packed2NestedPair, pair2: &mut Packed2NestedPair) { -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 2 %{{.*}}, ptr align 2 %{{.*}}, i{{[0-9]+}} 8, i1 false) +// CHECK: [[ALLOCA:%.+]] = alloca %Packed2NestedPair, align 2 +// CHECK: [[TEMP1:%.+]] = load i64, ptr %pair1, align 2 +// CHECK: store i64 [[TEMP1]], ptr [[ALLOCA]], align 2 +// CHECK: [[TEMP2:%.+]] = load i64, ptr [[ALLOCA]], align 2 +// CHECK: store i64 [[TEMP2]], ptr %pair2, align 2 *pair2 = *pair1; } diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs index eb4ce307e70fb..39eee60c5e1b0 100644 --- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs +++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs @@ -35,7 +35,8 @@ pub fn vector_align() -> usize { // CHECK-LABEL: @build_array_s #[no_mangle] pub fn build_array_s(x: [f32; 4]) -> S<4> { - // CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false) + // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] + // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] S::<4>(x) } @@ -50,7 +51,8 @@ pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> { // CHECK-LABEL: @build_array_t #[no_mangle] pub fn build_array_t(x: [f32; 4]) -> T { - // CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false) + // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] + // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] T(x) } diff --git a/tests/codegen/swap-small-types.rs b/tests/codegen/swap-small-types.rs index 419645a3fc6bc..7b081380bae21 100644 --- a/tests/codegen/swap-small-types.rs +++ b/tests/codegen/swap-small-types.rs @@ -6,6 +6,21 @@ use std::mem::swap; +// CHECK-LABEL: @swap_fat_ptrs +#[no_mangle] +pub fn swap_fat_ptrs<'a>(x: &mut &'a str, y: &mut &'a str) { + // CHECK-NOT: alloca + // CHECK: %[[X0:.+]] = load ptr, ptr %x, align 8 + // CHECK: %[[X1:.+]] = load i64, ptr %[[PX1:.+]], align 8 + // CHECK: %[[Y0:.+]] = load ptr, ptr %y, align 8 + // CHECK: %[[Y1:.+]] = load i64, ptr %[[PY1:.+]], align 8 + // CHECK: store ptr %[[Y0]], ptr %x, align 8 + // CHECK: store i64 %[[Y1]], ptr %[[PX1]], align 8 + // CHECK: store ptr %[[X0]], ptr %y, align 8 + // CHECK: store i64 %[[X1]], ptr %[[PY1]], align 8 + swap(x, y) +} + type RGB48 = [u16; 3]; // CHECK-LABEL: @swap_rgb48_manually( @@ -40,9 +55,9 @@ type RGB24 = [u8; 3]; // CHECK-LABEL: @swap_rgb24_slices #[no_mangle] pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i8> -// CHECK: store <{{[0-9]+}} x i8> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i8> + // CHECK: store <{{[0-9]+}} x i8> if x.len() == y.len() { x.swap_with_slice(y); } @@ -51,12 +66,23 @@ pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { // This one has a power-of-two size, so we iterate over it directly type RGBA32 = [u8; 4]; +// CHECK-LABEL: @swap_rgba32 +#[no_mangle] +pub fn swap_rgba32(x: &mut RGBA32, y: &mut RGBA32) { + // CHECK-NOT: alloca + // CHECK: load <4 x i8> + // CHECK: load <4 x i8> + // CHECK: store <4 x i8> + // CHECK: store <4 x i8> + swap(x, y) +} + // CHECK-LABEL: @swap_rgba32_slices #[no_mangle] pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i32> -// CHECK: store <{{[0-9]+}} x i32> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i32> + // CHECK: store <{{[0-9]+}} x i32> if x.len() == y.len() { x.swap_with_slice(y); } @@ -69,10 +95,36 @@ const _: () = assert!(!std::mem::size_of::().is_power_of_two()); // CHECK-LABEL: @swap_string_slices #[no_mangle] pub fn swap_string_slices(x: &mut [String], y: &mut [String]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i64> -// CHECK: store <{{[0-9]+}} x i64> + // CHECK-NOT: alloca + // CHECK: load <{{[0-9]+}} x i64> + // CHECK: store <{{[0-9]+}} x i64> if x.len() == y.len() { x.swap_with_slice(y); } } + +// It's wasteful to do three `memcpy`s when a `String` is just three fields. + +// CHECK-LABEL: @swap_strings +#[no_mangle] +pub fn swap_strings(x: &mut String, y: &mut String) { + // CHECK-NOT: alloca + // CHECK: load i192 + // CHECK: load i192 + // CHECK: store i192 + // CHECK: store i192 + swap(x, y) +} + +// CHECK-LABEL: @swap_tuple_with_padding +#[no_mangle] +pub fn swap_tuple_with_padding(x: &mut (u8, u32, u8), y: &mut (u8, u32, u8)) { + // CHECK-NOT: alloca + // CHECK: load i64 + // CHECK-NOT: noundef + // CHECK: load i64 + // CHECK-NOT: noundef + // CHECK: store i64 + // CHECK: store i64 + swap(x, y) +} diff --git a/tests/codegen/vec-in-place.rs b/tests/codegen/vec-in-place.rs index d68067ceb19f7..098a45ae498da 100644 --- a/tests/codegen/vec-in-place.rs +++ b/tests/codegen/vec-in-place.rs @@ -39,6 +39,16 @@ pub struct Baz { pub fn vec_iterator_cast_primitive(vec: Vec) -> Vec { // CHECK-NOT: loop // CHECK-NOT: call + + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + vec.into_iter().map(|e| e as u8).collect() } @@ -47,6 +57,16 @@ pub fn vec_iterator_cast_primitive(vec: Vec) -> Vec { pub fn vec_iterator_cast_wrapper(vec: Vec) -> Vec> { // CHECK-NOT: loop // CHECK-NOT: call + + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + vec.into_iter().map(|e| Wrapper(e)).collect() } @@ -55,6 +75,16 @@ pub fn vec_iterator_cast_wrapper(vec: Vec) -> Vec> { pub fn vec_iterator_cast_unwrap(vec: Vec>) -> Vec { // CHECK-NOT: loop // CHECK-NOT: call + + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + vec.into_iter().map(|e| e.0).collect() } @@ -63,6 +93,16 @@ pub fn vec_iterator_cast_unwrap(vec: Vec>) -> Vec { pub fn vec_iterator_cast_aggregate(vec: Vec<[u64; 4]>) -> Vec { // CHECK-NOT: loop // CHECK-NOT: call + + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + vec.into_iter().map(|e| unsafe { std::mem::transmute(e) }).collect() } @@ -72,6 +112,15 @@ pub fn vec_iterator_cast_deaggregate_tra(vec: Vec) -> Vec<[u64; 4]> { // CHECK-NOT: loop // CHECK-NOT: call + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + // Safety: For the purpose of this test we assume that Bar layout matches [u64; 4]. // This currently is not guaranteed for repr(Rust) types, but it happens to work here and // the UCG may add additional guarantees for homogenous types in the future that would make this @@ -85,6 +134,15 @@ pub fn vec_iterator_cast_deaggregate_fold(vec: Vec) -> Vec<[u64; 4]> { // CHECK-NOT: loop // CHECK-NOT: call + // Having an assume call is fine; that's not what this is trying to avoid + // CHECK: call void @llvm.assume + + // CHECK-NOT: loop + // CHECK-NOT: call + + // CHECK: ret + // CHECK-NEXT: } + // Safety: For the purpose of this test we assume that Bar layout matches [u64; 4]. // This currently is not guaranteed for repr(Rust) types, but it happens to work here and // the UCG may add additional guarantees for homogenous types in the future that would make this