Skip to content

Commit f1c72be

Browse files
committed
Use nbdd0121 suggestion for reducing the perf impact
1 parent 9ed05ed commit f1c72be

File tree

3 files changed

+15
-14
lines changed

3 files changed

+15
-14
lines changed

compiler/rustc_middle/src/ty/layout.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3375,20 +3375,18 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
33753375
// Pass and return structures up to 2 pointers in size by value,
33763376
// matching `ScalarPair`. LLVM will usually pass these in 2 registers
33773377
// which is more efficient than by-ref.
3378-
let max_by_val_size = Pointer.size(self) * 2;
3378+
let ptr_size = Pointer.size(self);
3379+
let max_by_val_size = ptr_size * 2;
33793380
let size = arg.layout.size;
33803381

33813382
if arg.layout.is_unsized() || size > max_by_val_size {
33823383
arg.make_indirect();
3383-
} else if unlikely!(self.has_all_float(&arg.layout)) {
3384+
} else if size > ptr_size && unlikely!(self.has_all_float(&arg.layout)) {
33843385
// We don't want to aggregate floats as an aggregates of Integer
3385-
// because this will hurt the generated assembly (#93490)
3386-
//
3387-
// As an optimization we want to pass homogeneous aggregate of floats
3388-
// greater than pointer size as indirect
3389-
if size > Pointer.size(self) {
3390-
arg.make_indirect();
3391-
}
3386+
// because this will hurt the generated assembly (#93490) but as an
3387+
// optimization we want to pass homogeneous aggregate of floats
3388+
// greater than pointer size as indirect.
3389+
arg.make_indirect();
33923390
} else {
33933391
// We want to pass small aggregates as immediates, but using
33943392
// a LLVM aggregate type for this leads to bad optimizations,

src/test/assembly/x86-64-homogenous-floats.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@ pub fn sum_f32(a: f32, b: f32) -> f32 {
1515
a + b
1616
}
1717

18-
// CHECK-LABEL: sum_f32x2:
19-
// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}}
20-
// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}}
18+
// CHECK-LABEL: sum_f64x2:
19+
// CHECK: mov rax, [[PTR_IN:.*]]
20+
// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi]
21+
// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx]
22+
// CHECK-NEXT: addpd [[XMMB]], [[XMMA]]
23+
// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]]
2124
// CHECK-NEXT: ret
2225
#[no_mangle]
23-
pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
26+
pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] {
2427
[
2528
a[0] + b[0],
2629
a[1] + b[1],

src/test/codegen/homogeneous-floats.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub struct Foo {
1313
bar4: f32,
1414
}
1515

16-
// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1)
16+
// CHECK: define i64 @array_f32x2(i64 %0, i64 %1)
1717
#[no_mangle]
1818
pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
1919
todo!()

0 commit comments

Comments
 (0)