-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[X86][SATCVT] Reduce MIN/MAXSS/D by conversion instruction result #136471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesCVTSD2SI returns INT_MIN/LONG_MIN when underflow happens. VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens. We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction. Partially fixes #136342 Patch is 38.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136471.diff 5 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a4381b99dbae0..652baadd903dc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21851,6 +21851,15 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
"Expected saturation width smaller than result width");
+ // AVX512 provides VCVTPS/D2UD/QQ which return INT_MAX/LONG_MAX when overflow
+ // happens. X86ISD::FMAX makes sure negative value and NaN return 0.
+ if (Subtarget.hasAVX512() && !IsSigned && SatWidth == DstWidth &&
+ (DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) {
+ SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT);
+ SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped);
+ }
+
// Promote result of FP_TO_*INT to at least 32 bits.
if (TmpWidth < 32) {
TmpVT = MVT::i32;
@@ -21912,9 +21921,10 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
}
+ SDValue MinClamped = Src;
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ if (!IsSigned || SatWidth != DstWidth)
+ MinClamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
// Clamp by MaxFloat from above. NaN cannot occur.
SDValue BothClamped = DAG.getNode(
X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b530fe0..0691da10b6f7e 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -6,11 +6,11 @@
define i32 @stest_f64i32(double %x) nounwind {
; CHECK-LABEL: stest_f64i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: cvttsd2si %xmm1, %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
; CHECK-NEXT: retq
entry:
@@ -619,11 +619,11 @@ entry:
define i32 @stest_f64i32_mm(double %x) nounwind {
; CHECK-LABEL: stest_f64i32_mm:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: cvttsd2si %xmm1, %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 85f4c945230e1..59d09121b654f 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -1453,21 +1453,21 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; X86-SSE-LABEL: test_signed_i32_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
+; X86-SSE-NEXT: minsd %xmm0, %xmm1
+; X86-SSE-NEXT: cvttsd2si %xmm1, %ecx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
-; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i32_f64:
; X64: # %bb.0:
+; X64-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
+; X64-NEXT: minsd %xmm0, %xmm1
+; X64-NEXT: cvttsd2si %xmm1, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
-; X64-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: cvttsd2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: retq
%x = call i32 @llvm.fptosi.sat.i32.f64(double %f)
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 536a1ae3b918d..32402d0423ee6 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -418,21 +418,18 @@ define <2 x i16> @test_signed_v2i16_v2f64(<2 x double> %f) nounwind {
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
; CHECK-LABEL: test_signed_v2i32_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm2 = [-2.147483648E+9,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = [2.147483647E+9,0.0E+0]
; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: maxsd %xmm2, %xmm1
-; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.147483647E+9,0.0E+0]
-; CHECK-NEXT: minsd %xmm3, %xmm1
+; CHECK-NEXT: minsd %xmm2, %xmm1
; CHECK-NEXT: cvttsd2si %xmm1, %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: ucomisd %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ecx, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: minsd %xmm0, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd %xmm2, %xmm0
-; CHECK-NEXT: minsd %xmm3, %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: cmovpl %ecx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index 47dc3ca3616ea..931a1f161cdb7 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=i686-linux | FileCheck %s --check-prefix=X86-X87
; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
;
; 32-bit float to unsigned integer
@@ -77,6 +78,16 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
ret i1 %x
}
@@ -140,6 +151,16 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [2.55E+2,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f32(float %f)
ret i8 %x
}
@@ -202,6 +223,16 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [8.191E+3,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f32(float %f)
ret i13 %x
}
@@ -264,6 +295,16 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [6.5535E+4,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f32(float %f)
ret i16 %x
}
@@ -323,6 +364,14 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind {
; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f32(float %f)
ret i19 %x
}
@@ -397,6 +446,13 @@ define i32 @test_unsigned_i32_f32(float %f) nounwind {
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f32(float %f)
ret i32 %x
}
@@ -523,6 +579,18 @@ define i50 @test_unsigned_i50_f32(float %f) nounwind {
; X64-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttss2si %xmm0, %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f32(float %f)
ret i50 %x
}
@@ -652,6 +720,13 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind {
; X64-NEXT: movq $-1, %rax
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f32(float %f)
ret i64 %x
}
@@ -796,6 +871,26 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX512-NEXT: callq __fixunssfti@PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
ret i100 %x
}
@@ -934,6 +1029,25 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX512-NEXT: callq __fixunssfti@PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
ret i128 %x
}
@@ -1012,6 +1126,16 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
ret i1 %x
}
@@ -1075,6 +1199,16 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [2.55E+2,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f64(double %f)
ret i8 %x
}
@@ -1137,6 +1271,16 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [8.191E+3,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f64(double %f)
ret i13 %x
}
@@ -1199,6 +1343,16 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [6.5535E+4,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f64(double %f)
ret i16 %x
}
@@ -1258,6 +1412,14 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind {
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f64(double %f)
ret i19 %x
}
@@ -1325,6 +1487,13 @@ define i32 @test_unsigned_i32_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f64(double %f)
ret i32 %x
}
@@ -1447,6 +1616,14 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind {
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f64(double %f)
ret i50 %x
}
@@ -1576,6 +1753,13 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind {
; X64-NEXT: movq $-1, %rax
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f64(double %f)
ret i64 %x
}
@@ -1720,6 +1904,26 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX512-NEXT: callq __fixunsdfti@PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovsd (%rsp), %xmm1 # 8-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero
+; AVX512-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f64(double %f)
ret i100 %x
}
@@ -1858,6 +2062,25 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX512-NEXT: callq __fixunsdfti@PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovsd (%rsp), %xmm1 # 8-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero
+; AVX512-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f64(double %f)
ret i128 %x
}
@@ -1955,6 +2178,20 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f16:
+; A...
[truncated]
|
c0adfe6
to
b433a73
Compare
(DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) { | ||
SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT); | ||
SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode); | ||
return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is relying on properties of the ultimately selected instruction, doesn't this need a custom ISD node?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so. FP_TO_UINT(f32/f64->i32/64) is and only is legal with AVX512, so we can consider it's a 1:1 map between them. See the lowering code:
if (!IsSigned && UseSSEReg) {
// Conversions from f32/f64 with AVX512 should be legal.
if (Subtarget.hasAVX512())
return Op;
CVTSD2SI returns INT_MIN/LONG_MIN when underflow happens. VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens. We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction. Partially fixes llvm#136342
CVTSD2SI returns INT_MIN/LONG_MIN when underflow happens. VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens.
We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction.
Partially fixes #136342