Skip to content

Commit c0adfe6

Browse files
committed
[X86][SATCVT] Reduce MIN/MAXSS/D by conversion instruction result
CVTSD2SI returns INT_MIN/LONG_MIN when underflow happens. VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens. We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction. Partially fixes #136342
1 parent 0a17427 commit c0adfe6

File tree

5 files changed

+635
-17
lines changed

5 files changed

+635
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+21-7
Original file line numberDiff line numberDiff line change
@@ -21851,6 +21851,15 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
2185121851
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
2185221852
"Expected saturation width smaller than result width");
2185321853

21854+
// AVX512 provides VCVTPS/D2UD/QQ which return INT_MAX/LONG_MAX when overflow
21855+
// happens. X86ISD::FMAX makes sure negative value and NaN return 0.
21856+
if (Subtarget.hasAVX512() && !IsSigned && SatWidth == DstWidth &&
21857+
(DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) {
21858+
SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT);
21859+
SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21860+
return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped);
21861+
}
21862+
2185421863
// Promote result of FP_TO_*INT to at least 32 bits.
2185521864
if (TmpWidth < 32) {
2185621865
TmpVT = MVT::i32;
@@ -21912,14 +21921,19 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
2191221921
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
2191321922
}
2191421923

21915-
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
21916-
SDValue MinClamped = DAG.getNode(
21917-
X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21918-
// Clamp by MaxFloat from above. NaN cannot occur.
21919-
SDValue BothClamped = DAG.getNode(
21920-
X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
21924+
SDValue MinClamped = Src;
21925+
// If Src is NaN, the result is MaxFloat.
21926+
unsigned MinOpc = X86ISD::FMIN; // If Src is NaN, the result is MaxFloat.
21927+
if (!IsSigned || SatWidth != DstWidth) {
21928+
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
21929+
MinClamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21930+
// NaN cannot occur.
21931+
MinOpc = X86ISD::FMINC;
21932+
}
21933+
// Clamp by MaxFloat from above.
21934+
SDValue Clamped = DAG.getNode(MinOpc, dl, SrcVT, MinClamped, MaxFloatNode);
2192121935
// Convert clamped value to integer.
21922-
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
21936+
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, Clamped);
2192321937

2192421938
if (!IsSigned) {
2192521939
// In the unsigned case we're done, because we mapped NaN to MinFloat,

llvm/test/CodeGen/X86/fpclamptosat.ll

-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ define i32 @stest_f64i32(double %x) nounwind {
88
; CHECK: # %bb.0: # %entry
99
; CHECK-NEXT: xorl %eax, %eax
1010
; CHECK-NEXT: ucomisd %xmm0, %xmm0
11-
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1211
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1312
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
1413
; CHECK-NEXT: cmovnpl %ecx, %eax
@@ -621,7 +620,6 @@ define i32 @stest_f64i32_mm(double %x) nounwind {
621620
; CHECK: # %bb.0: # %entry
622621
; CHECK-NEXT: xorl %eax, %eax
623622
; CHECK-NEXT: ucomisd %xmm0, %xmm0
624-
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
625623
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
626624
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
627625
; CHECK-NEXT: cmovnpl %ecx, %eax

llvm/test/CodeGen/X86/fptosi-sat-scalar.ll

-2
Original file line numberDiff line numberDiff line change
@@ -1455,7 +1455,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
14551455
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
14561456
; X86-SSE-NEXT: xorl %eax, %eax
14571457
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
1458-
; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
14591458
; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
14601459
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
14611460
; X86-SSE-NEXT: cmovnpl %ecx, %eax
@@ -1465,7 +1464,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
14651464
; X64: # %bb.0:
14661465
; X64-NEXT: xorl %eax, %eax
14671466
; X64-NEXT: ucomisd %xmm0, %xmm0
1468-
; X64-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14691467
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14701468
; X64-NEXT: cvttsd2si %xmm0, %ecx
14711469
; X64-NEXT: cmovnpl %ecx, %eax

llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll

+3-6
Original file line numberDiff line numberDiff line change
@@ -418,20 +418,17 @@ define <2 x i16> @test_signed_v2i16_v2f64(<2 x double> %f) nounwind {
418418
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
419419
; CHECK-LABEL: test_signed_v2i32_v2f64:
420420
; CHECK: # %bb.0:
421-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [-2.147483648E+9,0.0E+0]
421+
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [2.147483647E+9,0.0E+0]
422422
; CHECK-NEXT: movapd %xmm0, %xmm1
423-
; CHECK-NEXT: maxsd %xmm2, %xmm1
424-
; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.147483647E+9,0.0E+0]
425-
; CHECK-NEXT: minsd %xmm3, %xmm1
423+
; CHECK-NEXT: minsd %xmm2, %xmm1
426424
; CHECK-NEXT: cvttsd2si %xmm1, %eax
427425
; CHECK-NEXT: xorl %ecx, %ecx
428426
; CHECK-NEXT: ucomisd %xmm0, %xmm0
429427
; CHECK-NEXT: cmovpl %ecx, %eax
430428
; CHECK-NEXT: movd %eax, %xmm1
431429
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
432430
; CHECK-NEXT: ucomisd %xmm0, %xmm0
433-
; CHECK-NEXT: maxsd %xmm2, %xmm0
434-
; CHECK-NEXT: minsd %xmm3, %xmm0
431+
; CHECK-NEXT: minsd %xmm2, %xmm0
435432
; CHECK-NEXT: cvttsd2si %xmm0, %eax
436433
; CHECK-NEXT: cmovpl %ecx, %eax
437434
; CHECK-NEXT: movd %eax, %xmm0

0 commit comments

Comments
 (0)