Skip to content

Commit 84b0266

Browse files
authored
DAG: Pass flags to FoldConstantArithmetic (#93663)
There is simply way too much going on inside getNode. The complicated constant folding of vector handling works by looking for build_vector operands, and then tries to getNode the scalar element and then checks if constants were the result. As a side effect, this produces unused scalar operation nodes (previously, without flags). If the vector operation were later scalarized, it would find the flagless constant folding temporary and lose the flag. I don't think this is a reasonable way for constant folding to operate, but for now fix this by ensuring flags on the original operation are preserved in the temporary. This yields a clear code improvement for AMDGPU when f16 isn't legal. The Wasm cases switch from using a libcall to compare and select. We are evidently missing the fcmp+select to fminimum/fmaximum handling, but this would be further improved when that's handled. AArch64 also avoids the libcall, but looks worse and has a different call for some reason.
1 parent 083a266 commit 84b0266

File tree

6 files changed

+202
-194
lines changed

6 files changed

+202
-194
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1893,7 +1893,8 @@ class SelectionDAG {
18931893
const SDNode *N2);
18941894

18951895
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
1896-
ArrayRef<SDValue> Ops);
1896+
ArrayRef<SDValue> Ops,
1897+
SDNodeFlags Flags = SDNodeFlags());
18971898

18981899
/// Fold floating-point operations when all operands are constants and/or
18991900
/// undefined.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6333,7 +6333,8 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
63336333
}
63346334

63356335
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
6336-
EVT VT, ArrayRef<SDValue> Ops) {
6336+
EVT VT, ArrayRef<SDValue> Ops,
6337+
SDNodeFlags Flags) {
63376338
// If the opcode is a target-specific ISD node, there's nothing we can
63386339
// do here and the operand rules may not line up with the below, so
63396340
// bail early.
@@ -6690,7 +6691,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
66906691
}
66916692

66926693
// Constant fold the scalar operands.
6693-
SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
6694+
SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
66946695

66956696
// Legalize the (integer) scalar constant if necessary.
66966697
if (LegalSVT != SVT)
@@ -7261,7 +7262,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
72617262
}
72627263

72637264
// Perform trivial constant folding.
7264-
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
7265+
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
72657266
return SV;
72667267

72677268
// Canonicalize an UNDEF to the RHS, even over a constant.

llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
648648
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
649649
; CHECK-LABEL: test_v2f128:
650650
; CHECK: // %bb.0:
651-
; CHECK-NEXT: b fmaxl
651+
; CHECK-NEXT: sub sp, sp, #48
652+
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
653+
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
654+
; CHECK-NEXT: bl __gttf2
655+
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
656+
; CHECK-NEXT: cmp w0, #0
657+
; CHECK-NEXT: b.le .LBB18_2
658+
; CHECK-NEXT: // %bb.1:
659+
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
660+
; CHECK-NEXT: .LBB18_2:
661+
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
662+
; CHECK-NEXT: add sp, sp, #48
663+
; CHECK-NEXT: ret
652664
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
653665
ret fp128 %b
654666
}

llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,19 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
648648
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
649649
; CHECK-LABEL: test_v2f128:
650650
; CHECK: // %bb.0:
651-
; CHECK-NEXT: b fminl
651+
; CHECK-NEXT: sub sp, sp, #48
652+
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
653+
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
654+
; CHECK-NEXT: bl __lttf2
655+
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
656+
; CHECK-NEXT: cmp w0, #0
657+
; CHECK-NEXT: b.ge .LBB18_2
658+
; CHECK-NEXT: // %bb.1:
659+
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
660+
; CHECK-NEXT: .LBB18_2:
661+
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
662+
; CHECK-NEXT: add sp, sp, #48
663+
; CHECK-NEXT: ret
652664
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
653665
ret fp128 %b
654666
}

llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll

Lines changed: 48 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -654,21 +654,16 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
654654
; GFX7-LABEL: v_maximum_v2f16__nnan:
655655
; GFX7: ; %bb.0:
656656
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
657658
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
658659
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
659-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
660660
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
661+
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
661662
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
662663
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
663-
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
664664
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
665-
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
666-
; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
667-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
668-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
669-
; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
670-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
671-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
665+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
666+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
672667
; GFX7-NEXT: s_setpc_b64 s[30:31]
673668
;
674669
; GFX8-LABEL: v_maximum_v2f16__nnan:
@@ -847,21 +842,16 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
847842
; GFX7-LABEL: v_maximum_v2f16__nnan_nsz:
848843
; GFX7: ; %bb.0:
849844
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
845+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
850846
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
851847
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
852-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
853848
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
849+
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
854850
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
855851
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
856-
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
857852
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
858-
; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000
859-
; GFX7-NEXT: v_max_f32_e32 v4, v0, v2
860-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2
861-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
862-
; GFX7-NEXT: v_max_f32_e32 v2, v1, v3
863-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3
864-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
853+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
854+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
865855
; GFX7-NEXT: s_setpc_b64 s[30:31]
866856
;
867857
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
@@ -1216,28 +1206,21 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
12161206
; GFX7-LABEL: v_maximum_v3f16__nnan:
12171207
; GFX7: ; %bb.0:
12181208
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209+
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1210+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1211+
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
12191212
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
12201213
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1221-
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
12221214
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1223-
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1224-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1215+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1216+
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
12251217
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
12261218
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1227-
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
12281219
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1229-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
12301220
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1231-
; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1232-
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1233-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1234-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1235-
; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1236-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1237-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1238-
; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1239-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1240-
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1221+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1222+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1223+
; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
12411224
; GFX7-NEXT: s_setpc_b64 s[30:31]
12421225
;
12431226
; GFX8-LABEL: v_maximum_v3f16__nnan:
@@ -1427,28 +1410,21 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
14271410
; GFX7-LABEL: v_maximum_v3f16__nnan_nsz:
14281411
; GFX7: ; %bb.0:
14291412
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413+
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1414+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1415+
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
14301416
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
14311417
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1432-
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
14331418
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1434-
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1435-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1419+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1420+
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
14361421
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
14371422
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1438-
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
14391423
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1440-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
14411424
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1442-
; GFX7-NEXT: v_max_f32_e32 v6, v0, v3
1443-
; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000
1444-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3
1445-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc
1446-
; GFX7-NEXT: v_max_f32_e32 v3, v1, v4
1447-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4
1448-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc
1449-
; GFX7-NEXT: v_max_f32_e32 v3, v2, v5
1450-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5
1451-
; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
1425+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
1426+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
1427+
; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
14521428
; GFX7-NEXT: s_setpc_b64 s[30:31]
14531429
;
14541430
; GFX8-LABEL: v_maximum_v3f16__nnan_nsz:
@@ -1671,35 +1647,26 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
16711647
; GFX7-LABEL: v_maximum_v4f16__nnan:
16721648
; GFX7: ; %bb.0:
16731649
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650+
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1651+
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1652+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1653+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1654+
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
16741655
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
16751656
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1676-
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
16771657
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1678-
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1679-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1680-
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1681-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1658+
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1659+
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1660+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
16821661
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
16831662
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1684-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
16851663
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1686-
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
16871664
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1688-
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
16891665
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1690-
; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1691-
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1692-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1693-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1694-
; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1695-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1696-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1697-
; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1698-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1699-
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1700-
; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1701-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1702-
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1666+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1667+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1668+
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1669+
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
17031670
; GFX7-NEXT: s_setpc_b64 s[30:31]
17041671
;
17051672
; GFX8-LABEL: v_maximum_v4f16__nnan:
@@ -1924,35 +1891,26 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
19241891
; GFX7-LABEL: v_maximum_v4f16__nnan_nsz:
19251892
; GFX7: ; %bb.0:
19261893
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1894+
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1895+
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1896+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1897+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1898+
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
19271899
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
19281900
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1929-
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
19301901
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1931-
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1932-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1933-
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
1934-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1902+
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1903+
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1904+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
19351905
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
19361906
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1937-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
19381907
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1939-
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
19401908
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1941-
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
19421909
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1943-
; GFX7-NEXT: v_max_f32_e32 v8, v0, v4
1944-
; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000
1945-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4
1946-
; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc
1947-
; GFX7-NEXT: v_max_f32_e32 v4, v1, v5
1948-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5
1949-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc
1950-
; GFX7-NEXT: v_max_f32_e32 v4, v2, v6
1951-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6
1952-
; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc
1953-
; GFX7-NEXT: v_max_f32_e32 v4, v3, v7
1954-
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7
1955-
; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
1910+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
1911+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
1912+
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
1913+
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
19561914
; GFX7-NEXT: s_setpc_b64 s[30:31]
19571915
;
19581916
; GFX8-LABEL: v_maximum_v4f16__nnan_nsz:

0 commit comments

Comments
 (0)