Skip to content

Commit b2fba9a

Browse files
committed
Revert "Use nbdd0121 suggestion for reducing the perf impact"
This reverts commit 2e0cf271285089316db55b995312712638126245.
1 parent 1be1d4a commit b2fba9a

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

compiler/rustc_middle/src/ty/layout.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3375,18 +3375,20 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
33753375
// Pass and return structures up to 2 pointers in size by value,
33763376
// matching `ScalarPair`. LLVM will usually pass these in 2 registers
33773377
// which is more efficient than by-ref.
3378-
let ptr_size = Pointer.size(self);
3379-
let max_by_val_size = ptr_size * 2;
3378+
let max_by_val_size = Pointer.size(self) * 2;
33803379
let size = arg.layout.size;
33813380

33823381
if arg.layout.is_unsized() || size > max_by_val_size {
33833382
arg.make_indirect();
3384-
} else if size > ptr_size && self.has_all_float(&arg.layout) {
3383+
} else if self.has_all_float(&arg.layout) {
33853384
// We don't want to aggregate floats as an aggregates of Integer
3386-
// because this will hurt the generated assembly (#93490) but as an
3387-
// optimization we want to pass homogeneous aggregate of floats
3388-
// greater than pointer size as indirect.
3389-
arg.make_indirect();
3385+
// because this will hurt the generated assembly (#93490)
3386+
//
3387+
// As an optimization we want to pass homogeneous aggregate of floats
3388+
// greater than pointer size as indirect
3389+
if size > Pointer.size(self) {
3390+
arg.make_indirect();
3391+
}
33903392
} else {
33913393
// We want to pass small aggregates as immediates, but using
33923394
// a LLVM aggregate type for this leads to bad optimizations,

src/test/assembly/x86-64-homogenous-floats.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,12 @@ pub fn sum_f32(a: f32, b: f32) -> f32 {
1515
a + b
1616
}
1717

18-
// CHECK-LABEL: sum_f64x2:
19-
// CHECK: mov rax, [[PTR_IN:.*]]
20-
// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi]
21-
// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx]
22-
// CHECK-NEXT: addpd [[XMMB]], [[XMMA]]
23-
// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]]
18+
// CHECK-LABEL: sum_f32x2:
19+
// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}}
20+
// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}}
2421
// CHECK-NEXT: ret
2522
#[no_mangle]
26-
pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] {
23+
pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
2724
[
2825
a[0] + b[0],
2926
a[1] + b[1],

src/test/codegen/homogeneous-floats.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub struct Foo {
1313
bar4: f32,
1414
}
1515

16-
// CHECK: define i64 @array_f32x2(i64 %0, i64 %1)
16+
// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1)
1717
#[no_mangle]
1818
pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
1919
todo!()

0 commit comments

Comments
 (0)